# app.py — P2-ETF-DQN-ENGINE Streamlit UI import json import os import shutil from datetime import datetime, date, timedelta, timezone import numpy as np import pandas as pd import plotly.graph_objects as go import requests as req import streamlit as st import config SWEEP_YEARS = [2008, 2013, 2015, 2017, 2019, 2021] WORKFLOW_FILE = "train_models.yml" ETF_COLORS = { "TLT": "#4e79a7", "VCIT": "#f28e2b", "LQD": "#59a14f", "HYG": "#e15759", "VNQ": "#76b7b2", "SLV": "#edc948", "GLD": "#b07aa1", "CASH": "#aaaaaa", } # ── Page config ─────────────────────────────────────────────────────────────── st.set_page_config( page_title="P2 ETF DQN Engine", layout="wide", initial_sidebar_state="expanded", ) st.markdown(""" """, unsafe_allow_html=True) # ── Helpers ─────────────────────────────────────────────────────────────────── def _load_json(path: str) -> dict: if os.path.exists(path): with open(path) as f: return json.load(f) return {} def _next_trading_day() -> date: US_HOLIDAYS = { date(2025,1,1), date(2025,1,20), date(2025,2,17), date(2025,4,18), date(2025,5,26), date(2025,6,19), date(2025,7,4), date(2025,9,1), date(2025,11,27), date(2025,12,25), date(2026,1,1), date(2026,1,19), date(2026,2,16), date(2026,4,3), date(2026,5,25), date(2026,6,19), date(2026,7,3), date(2026,9,7), date(2026,11,26), date(2026,12,25), } now_est = datetime.utcnow() - timedelta(hours=5) today = now_est.date() if today.weekday() < 5 and today not in US_HOLIDAYS and now_est.hour < 16: return today d = today + timedelta(days=1) while d.weekday() >= 5 or d in US_HOLIDAYS: d += timedelta(days=1) return d def _trigger_github(start_year: int, fee_bps: int, tsl_pct: float, z_reentry: float, sweep_mode: str = "") -> bool: try: token = os.getenv("GITHUB_TOKEN", "") if not token: st.error("❌ GITHUB_TOKEN not found in Space secrets.") return False url = f"https://api.github.com/repos/{config.GITHUB_REPO}/actions/workflows/{WORKFLOW_FILE}/dispatches" resp = req.post(url, headers={"Authorization": f"token {token}", "Accept": "application/vnd.github+json"}, json={"ref": "main", "inputs": { "start_year": str(start_year), "fee_bps": str(fee_bps), "tsl_pct": str(tsl_pct), "z_reentry": str(z_reentry), "sweep_mode": sweep_mode, }}, timeout=10, ) if resp.status_code != 204: st.error(f"❌ GitHub API returned HTTP {resp.status_code} — {resp.text[:300]}") return resp.status_code == 204 except Exception as e: st.error(f"❌ Exception: {str(e)}") return False def _get_latest_workflow_run() -> dict: try: token = os.getenv("GITHUB_TOKEN", "") if not token: return {} url = f"https://api.github.com/repos/{config.GITHUB_REPO}/actions/workflows/{WORKFLOW_FILE}/runs?per_page=1" r = req.get(url, headers={"Authorization": f"token {token}", "Accept": "application/vnd.github+json"}, timeout=10) if r.status_code == 200: runs = r.json().get("workflow_runs", []) return runs[0] if runs else {} except Exception: pass return {} def _today_est() -> date: return (datetime.now(timezone.utc) - timedelta(hours=5)).date() def _sweep_filename(year: int, for_date: date) -> str: return f"sweep_{year}_{for_date.strftime('%Y%m%d')}.json" def _load_sweep_cache(for_date: date) -> dict: """Load date-stamped sweep files from HF Dataset.""" cache = {} try: from huggingface_hub import hf_hub_download token = os.getenv("HF_TOKEN") repo_id = os.getenv("HF_DATASET_REPO", "P2SAMAPA/P2-ETF-DQN-ENGINE-DATASET") date_tag = for_date.strftime("%Y%m%d") for yr in SWEEP_YEARS: fname = f"sweep_{yr}_{date_tag}.json" try: path = hf_hub_download(repo_id=repo_id, filename=f"sweep/{fname}", repo_type="dataset", token=token, force_download=True) with open(path) as f: cache[yr] = json.load(f) except Exception: pass except Exception: pass return cache def _load_sweep_cache_any() -> tuple: """Load most recent sweep files from HF Dataset regardless of date. Returns (cache, date).""" found, best_date = {}, None try: from huggingface_hub import HfApi, hf_hub_download token = os.getenv("HF_TOKEN") repo_id = os.getenv("HF_DATASET_REPO", "P2SAMAPA/P2-ETF-DQN-ENGINE-DATASET") api = HfApi() files = list(api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=token)) # Find most recent date across all sweep files for fname in files: fname = os.path.basename(fname) if fname.startswith("sweep_") and fname.endswith(".json"): parts = fname.replace(".json","").split("_") if len(parts) == 3: try: dt = datetime.strptime(parts[2], "%Y%m%d").date() if best_date is None or dt > best_date: best_date = dt except Exception: pass if best_date: date_tag = best_date.strftime("%Y%m%d") for yr in SWEEP_YEARS: fname = f"sweep_{yr}_{date_tag}.json" try: path = hf_hub_download(repo_id=repo_id, filename=f"sweep/{fname}", repo_type="dataset", token=token, force_download=True) with open(path) as f: found[yr] = json.load(f) except Exception: pass except Exception: pass return found, best_date def _compute_consensus(sweep_data: dict) -> dict: """40% Return · 20% Z · 20% Sharpe · 20% (-MaxDD), min-max normalised.""" rows = [] for yr, sig in sweep_data.items(): rows.append({ "year": yr, "signal": sig.get("signal", "?"), "ann_return": sig.get("ann_return", 0.0), "z_score": sig.get("z_score", 0.0), "sharpe": sig.get("sharpe", 0.0), "max_dd": sig.get("max_dd", 0.0), "conviction": sig.get("conviction", "?"), "lookback": sig.get("lookback", "?"), }) if not rows: return {} df = pd.DataFrame(rows) def _mm(s): mn, mx = s.min(), s.max() return (s - mn) / (mx - mn + 1e-9) df["n_ret"] = _mm(df["ann_return"]) df["n_z"] = _mm(df["z_score"]) df["n_sharpe"] = _mm(df["sharpe"]) df["n_negdd"] = _mm(-df["max_dd"]) df["wtd"] = 0.40*df["n_ret"] + 0.20*df["n_z"] + 0.20*df["n_sharpe"] + 0.20*df["n_negdd"] etf_agg = {} for _, row in df.iterrows(): e = row["signal"] etf_agg.setdefault(e, {"years": [], "scores": [], "returns": [], "zs": [], "sharpes": [], "dds": []}) etf_agg[e]["years"].append(row["year"]) etf_agg[e]["scores"].append(row["wtd"]) etf_agg[e]["returns"].append(row["ann_return"]) etf_agg[e]["zs"].append(row["z_score"]) etf_agg[e]["sharpes"].append(row["sharpe"]) etf_agg[e]["dds"].append(row["max_dd"]) total = sum(sum(v["scores"]) for v in etf_agg.values()) + 1e-9 summary = {} for e, v in etf_agg.items(): cs = sum(v["scores"]) summary[e] = { "cum_score": round(cs, 4), "score_share": round(cs / total, 3), "n_years": len(v["years"]), "years": v["years"], "avg_return": round(float(np.mean(v["returns"])), 4), "avg_z": round(float(np.mean(v["zs"])), 3), "avg_sharpe": round(float(np.mean(v["sharpes"])), 3), "avg_max_dd": round(float(np.mean(v["dds"])), 4), } winner = max(summary, key=lambda e: summary[e]["cum_score"]) return {"winner": winner, "etf_summary": summary, "per_year": df.to_dict("records"), "n_years": len(rows)} # ── Sidebar ─────────────────────────────────────────────────────────────────── with st.sidebar: st.markdown("## ⚙️ Configuration") if st.button("🔄 Refresh Data & Clear Cache"): st.cache_data.clear() st.toast("Cache cleared — reloading...") st.rerun() st.divider() st.markdown("### 📅 Training Parameters") st.caption("Changes here require retraining via GitHub Actions.") start_year = st.slider("Start Year", config.START_YEAR_MIN if hasattr(config, "START_YEAR_MIN") else 2008, 2024, 2015) fee_bps = st.number_input("T-Costs (bps)", 0, 50, 10) st.divider() st.markdown("### 🛡️ Risk Controls") st.caption("Instant — no retraining needed.") tsl_pct = st.slider("Trailing Stop Loss (%)", 5.0, 25.0, 10.0, 0.5) z_reentry = st.slider("Re-entry Z-Score Threshold", 0.5, 3.0, 1.1, 0.1) st.divider() run_btn = st.button("🚀 Retrain DQN Agent", help="Triggers GitHub Actions training job", use_container_width=True) if run_btn: triggered = _trigger_github(start_year, fee_bps, tsl_pct, z_reentry, sweep_mode="") if triggered: st.success( f"✅ Training triggered!\n\n" f"Training from **{start_year}** · 200 episodes · " f"**{fee_bps}bps** fees\n\n" f"Results update here in ~50–65 min." ) else: st.warning( "⚠️ Could not trigger GitHub Actions automatically.\n\n" f"**Manual steps:**\n" f"- Go to GitHub → Actions → Train DQN Agent\n" f"- Set `start_year = {start_year}`\n" f"- Or add `GITHUB_TOKEN` to HF Space secrets." ) st.caption(f"↑ Trains from {start_year} onwards · 200 episodes (hardcoded in train_models.yml)") # ── Load outputs ────────────────────────────────────────────────────────────── pred = _load_json("latest_prediction.json") evalu = _load_json("evaluation_results.json") next_td = _next_trading_day() final_signal = pred.get("final_signal", "—") z_score = pred.get("z_score", 0.0) confidence = pred.get("confidence", pred.get("final_confidence", 0.0)) tsl_stat = pred.get("tsl_status", {}) tbill_rt = pred.get("tbill_rate", 3.6) probs = pred.get("probabilities", {}) q_vals = pred.get("q_values", {}) trained_from_year= pred.get("trained_from_year") trained_at = pred.get("trained_at") in_cash = tsl_stat.get("in_cash", False) tsl_triggered = tsl_stat.get("tsl_triggered", False) two_day_ret = tsl_stat.get("two_day_cumul_pct", 0.0) # ── Header ──────────────────────────────────────────────────────────────────── st.title("🤖 P2 ETF DQN Engine") st.caption("Dueling Deep Q-Network · Multi-Asset ETF Selection · arXiv:2411.07585") # ── Check latest workflow run status ───────────────────────────────────────── latest_run = _get_latest_workflow_run() is_training = latest_run.get("status") in ("queued", "in_progress") run_started = latest_run.get("created_at", "")[:16].replace("T", " ") if latest_run else "" # ── Tabs ────────────────────────────────────────────────────────────────────── tab1, tab2 = st.tabs(["📊 Single-Year Results", "🔄 Multi-Year Consensus Sweep"]) # ═══════════════════════════════════════════════════════════════════════════════ # TAB 1 — Single-Year Results (existing content) # ═══════════════════════════════════════════════════════════════════════════════ with tab1: # ── Provenance banner ───────────────────────────────────────────────────────── if trained_from_year and trained_at: trained_date = trained_at[:10] st.markdown( f'
📋 Active model trained from ' f'{trained_from_year} · Generated {trained_date} · ' f'Val Sharpe {evalu.get("sharpe", "—")}
', unsafe_allow_html=True ) else: st.info("⚠️ No trained model found. Click **🚀 Retrain DQN Agent** to train.") st.markdown("---") # ── TSL override banner ─────────────────────────────────────────────────────── if tsl_triggered: st.markdown(f"""
🔴 TRAILING STOP LOSS TRIGGERED — 2-day return ({float(two_day_ret):+.1f}%) breached −{tsl_pct:.0f}% threshold. Holding CASH @ {tbill_rt:.2f}% T-bill until Z ≥ {z_reentry:.1f}σ.
""", unsafe_allow_html=True) # ── Signal Hero Card ────────────────────────────────────────────────────────── now_est = datetime.utcnow() - timedelta(hours=5) is_today = (next_td == now_est.date()) td_label = "TODAY'S SIGNAL" if is_today else "NEXT TRADING DAY" if in_cash or not pred: st.markdown(f"""
⚠️ Risk Override Active · {td_label}
💵 CASH
Earning 3m T-bill: {tbill_rt:.2f}% p.a.  |  Re-entry when Z ≥ {z_reentry:.1f}σ
""", unsafe_allow_html=True) else: prov_str = "" if trained_from_year and trained_at: prov_str = (f"📋 Trained from {trained_from_year} · " f"Generated {trained_at[:10]} · Z-Score {z_score:.2f}σ") st.markdown(f"""
Dueling DQN · {td_label}
{final_signal}
🎯 {next_td}  |  Confidence {float(confidence):.1%}  |  Z-Score {float(z_score):.2f}σ
{"
" + prov_str + "
" if prov_str else ""}
""", unsafe_allow_html=True) # ── Key Metrics ─────────────────────────────────────────────────────────────── if evalu: c1, c2, c3, c4, c5 = st.columns(5) c1.metric("Ann. Return", f"{evalu.get('ann_return', 0):.1%}") c2.metric("Sharpe Ratio", f"{evalu.get('sharpe', 0):.2f}") c3.metric("Max Drawdown", f"{evalu.get('max_drawdown', 0):.1%}") c4.metric("Calmar Ratio", f"{evalu.get('calmar', 0):.2f}") c5.metric("Hit Ratio", f"{evalu.get('hit_ratio', 0):.1%}") # Benchmark comparison — ann return bench_ann = evalu.get("benchmark_ann", {}) if bench_ann: bc1, bc2 = st.columns(2) strat_ann = evalu.get("ann_return", 0) for col, (k, v) in zip([bc1, bc2], bench_ann.items()): delta = strat_ann - v col.metric(f"{k} Ann. Return", f"{v:.1%}", delta=f"{delta:+.1%} vs strategy") st.markdown("---") # ── Q-Value / Probability Bar Chart ────────────────────────────────────────── if probs: st.subheader("📊 Action Probabilities (Softmax Q-Values)") actions = list(probs.keys()) values = [probs[a] for a in actions] colours = ["#cc6600" if a == "CASH" else "#0066cc" if a == final_signal else "#6c757d" for a in actions] fig = go.Figure(go.Bar( x=actions, y=values, marker_color=colours, text=[f"{v:.1%}" for v in values], textposition="outside", )) fig.update_layout( paper_bgcolor="#ffffff", plot_bgcolor="#ffffff", font_color="#1a1a1a", yaxis_title="Probability", xaxis_title="Action", height=300, margin=dict(t=20, b=20), yaxis=dict(gridcolor="#e9ecef"), ) st.plotly_chart(fig, use_container_width=True) st.caption( "**How to read:** Each bar is the agent's probability of choosing that action today, " "derived from softmax of the DQN Q-values. 🔵 Blue = chosen action. 🟠 Orange = CASH. " "Grey = rejected. A dominant bar = high conviction. Similar-height bars = low conviction / uncertain signal." ) # ── Equity Curve ────────────────────────────────────────────────────────────── if evalu and "equity_curve" in evalu: st.subheader("📈 Test-Set Equity Curve vs Benchmarks") st.caption("Normalised to 1.0 at start of test period. SPY and AGG shown for comparison.") equity = evalu["equity_curve"] test_dates = evalu.get("test_dates", []) x_axis = test_dates if len(test_dates) == len(equity) else list(range(len(equity))) fig2 = go.Figure() fig2.add_trace(go.Scatter( x=x_axis, y=equity, mode="lines", name="DQN Strategy", line=dict(color="#0066cc", width=2.5), )) # SPY and AGG from json — no load_local needed bench_equity = evalu.get("benchmark_equity", {}) bench_colours = {"SPY": "#e63946", "AGG": "#2a9d8f"} for b, beq in bench_equity.items(): bx = test_dates if len(test_dates) == len(beq) else list(range(len(beq))) fig2.add_trace(go.Scatter( x=bx, y=beq, mode="lines", name=b, line=dict(width=1.5, dash="dot", color=bench_colours.get(b, "#888888")), )) fig2.update_layout( paper_bgcolor="#ffffff", plot_bgcolor="#ffffff", font_color="#1a1a1a", height=420, yaxis_title="Normalised Equity (start = 1.0)", xaxis_title="Date", legend=dict(bgcolor="#f8f9fa", orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0), yaxis=dict(gridcolor="#e9ecef", tickformat=".2f"), xaxis=dict(tickangle=-45, nticks=12, gridcolor="#e9ecef"), margin=dict(t=40, b=60), ) st.plotly_chart(fig2, use_container_width=True) # ── Allocation Breakdown ────────────────────────────────────────────────────── if evalu and "allocation_pct" in evalu: st.subheader("📊 Allocation Breakdown (Test Set)") alloc = evalu["allocation_pct"] fig3 = go.Figure(go.Pie( labels=list(alloc.keys()), values=list(alloc.values()), hole=0.45, marker_colors=["#0066cc","#28a745","#ffc107","#fd7e14", "#6f42c1","#e83e8c","#17a2b8","#adb5bd"], )) fig3.update_layout( paper_bgcolor="#ffffff", font_color="#1a1a1a", height=320, margin=dict(t=20), ) st.plotly_chart(fig3, use_container_width=True) st.caption( "**How to read:** Percentage of out-of-sample (OOS) test days the agent held each position. " "A well-trained agent rotates across ETFs. Heavy concentration in one slice (80%+) = " "agent defaulted to a single-asset strategy — sign of poor learning." ) st.markdown("---") # ── 15-Day Audit Trail ──────────────────────────────────────────────────────── if evalu and "allocations" in evalu and len(evalu["allocations"]) > 0: st.subheader("🗓️ 15-Day Audit Trail — Most Recent OOS Days") st.caption("Last 15 trading days from the out-of-sample test period.") allocs = evalu["allocations"] eq_curve = evalu.get("equity_curve", []) test_dates = evalu.get("test_dates", []) daily_rets = [] if len(eq_curve) > 1: for i in range(1, len(eq_curve)): daily_rets.append(eq_curve[i] / eq_curve[i-1] - 1) n_show = min(15, len(allocs)) last_allocs = allocs[-n_show:] last_rets = daily_rets[-n_show:] if daily_rets else [0.0] * n_show last_equity = eq_curve[-n_show:] if eq_curve else [1.0] * n_show last_dates = test_dates[-n_show:] if len(test_dates) >= n_show else [f"Day {i+1}" for i in range(n_show)] audit_df = pd.DataFrame({ "Date" : last_dates, "Allocation": last_allocs, "Daily Ret" : last_rets, "Equity" : last_equity, }) def _colour_ret(val): color = "#d93025" if val < 0 else "#188038" return f"color: {color}; font-weight: bold" styled = ( audit_df.style .format({"Daily Ret": "{:+.2%}", "Equity": "{:.4f}"}) .applymap(_colour_ret, subset=["Daily Ret"]) .set_properties(**{"text-align": "center"}) .hide(axis="index") ) st.dataframe(styled, use_container_width=True) st.markdown("---") # ── Methodology Section ─────────────────────────────────────────────────────── st.subheader("🧠 Methodology") st.markdown("""

Reinforcement Learning Framework — Dueling DQN

This engine implements a Dueling Deep Q-Network (Dueling DQN) for daily ETF selection, directly extending the RL framework proposed by Yasin & Gill (2024)"Reinforcement Learning Framework for Quantitative Trading", presented at the ICAIF 2024 FM4TS Workshop (arXiv:2411.07585).

From the Paper → Our Implementation

The paper benchmarks DQN, PPO, and A2C agents on single-stock buy/sell decisions using 20 technical indicators, finding that DQN with MLP policy significantly outperforms policy-gradient methods (PPO, A2C) on daily financial time-series, and that higher learning rates (lr = 0.001) produce the most profitable signals.

We extend this methodology in three key ways:

  1. Multi-Asset Action Space: Rather than binary buy/sell on a single asset, the agent selects from 8 discrete actions — CASH or one of 7 ETFs (TLT, VCIT, LQD, HYG, VNQ, GLD, SLV). This is fundamentally a harder problem than the paper's setup, requiring the agent to learn relative value across assets.
  2. Dueling Architecture (Wang et al., 2016): We replace the paper's standard DQN with a Dueling DQN, which separates the Q-function into a state-value stream V(s) and an advantage stream A(s,a):
    Q(s,a) = V(s) + A(s,a) − mean_a(A(s,a))
    This is specifically more effective for multi-action spaces because it explicitly learns which state is valuable independent of which action to take — critical when TLT and VCIT have similar Q-values in a rate-falling regime.
  3. Macro State Augmentation: The paper's state space uses only price-derived technical indicators. We add six FRED macro signals to the state: VIX, T10Y2Y (yield curve slope), TBILL_3M, DXY, Corp Spread, and HY Spread. These directly encode the macro regime that drives fixed-income and credit ETF selection.
State Space (per trading day)

20 technical indicators per ETF × 7 ETFs + 6 macro signals (+ z-scored variants), all computed over a rolling 20-day lookback window. The flattened window is fed to the DQN as a single state vector. Indicators follow the paper exactly: RSI(14), MACD(12/26/9), Stochastic(14), CCI(20), ROC(10), CMO(14), Williams%R, ATR, Bollinger %B + Width, StochRSI, Ultimate Oscillator, Momentum(10), rolling returns at 1/5/10/21d, and 21d realised volatility.

Reward Function

Reward = excess daily return over 3m T-bill, minus transaction cost on switches, scaled by inverse 21d realised volatility to penalise drawdown-prone positions. This replaces the paper's raw P&L reward with a risk-adjusted signal aligned with Sharpe Ratio maximisation.

Training

Data split is 80/10/10 (train/val/test) from the user-selected start year to present. Best weights are saved by validation-set Sharpe Ratio. The agent uses Double DQN (online network selects action, frozen target network evaluates) to reduce Q-value overestimation — a known instability in financial RL applications. Experience replay buffer of 100k transitions; hard target network update every 500 steps; ε-greedy exploration decaying from 1.0 → 0.05 over the first 50% of training.

Risk Controls

A post-signal Trailing Stop Loss overrides the DQN signal to CASH if the 2-day cumulative return of the held ETF breaches the configured threshold. Re-entry from CASH requires the DQN's best-action Z-score to clear the re-entry threshold, ensuring the model has recovered conviction before re-entering risk.

""", unsafe_allow_html=True) # ── Reference ───────────────────────────────────────────────────────────────── st.markdown("""
Reference: Yasin, A.S. & Gill, P.S. (2024). Reinforcement Learning Framework for Quantitative Trading. arXiv:2411.07585 [q-fin.TR]. Accepted at ICAIF 2024 FM4TS Workshop.  ·  Dueling DQN: Wang, Z. et al. (2016). Dueling Network Architectures for Deep Reinforcement Learning. ICML 2016.
""", unsafe_allow_html=True) # ═══════════════════════════════════════════════════════════════════════════════ # TAB 2 — Multi-Year Consensus Sweep # ═══════════════════════════════════════════════════════════════════════════════ with tab2: st.subheader("🔄 Multi-Year Consensus Sweep") st.markdown( f"Runs the DQN agent across **{len(SWEEP_YEARS)} start years** and aggregates signals " f"into a weighted consensus vote. \n" f"**Sweep years:** {', '.join(str(y) for y in SWEEP_YEARS)}  ·  " f"**Score:** 40% Return · 20% Z · 20% Sharpe · 20% (–MaxDD) \n" f"Auto-runs daily at **8pm EST**. Results are date-stamped — stale cache never shown." ) today_est = _today_est() # ── Load today's cache ──────────────────────────────────────────────────── today_cache = _load_sweep_cache(today_est) prev_cache, prev_date = _load_sweep_cache_any() # Separate prev from today if prev_date == today_est: prev_cache, prev_date = {}, None n_today = len(today_cache) n_total = len(SWEEP_YEARS) sweep_done = n_today == n_total sweep_partial = 0 < n_today < n_total # ── Training-in-progress banner ─────────────────────────────────────────── if is_training: st.warning( f"⏳ **Training in progress** (started {run_started} UTC) — " f"{n_today}/{n_total} years complete today. " f"Showing previous day's results below where available.", icon="🔄" ) # ── Date stamp warning if showing previous day ──────────────────────────── display_cache = today_cache if today_cache else prev_cache display_date = today_est if today_cache else prev_date if display_cache and display_date and display_date < today_est: st.warning( f"⚠️ Showing results from **{display_date}** (previous day). " f"Today's sweep has not run yet — it will auto-trigger at 8pm EST.", icon="📅" ) # ── Year status grid ────────────────────────────────────────────────────── cols = st.columns(n_total) for i, yr in enumerate(SWEEP_YEARS): with cols[i]: today_has = yr in today_cache prev_has = yr in prev_cache if today_has: sig = today_cache[yr].get("signal", "?") st.success(f"**{yr}**\n✅ {sig}") elif is_training and prev_has: sig = prev_cache[yr].get("signal", "?") st.info(f"**{yr}**\n⏳ {sig}*") else: st.error(f"**{yr}**\n⏳ Not run") if is_training: st.caption("\\* = previous day's result shown while today's training is in progress") st.divider() # ── Manual sweep button ─────────────────────────────────────────────────── missing_today = [yr for yr in SWEEP_YEARS if yr not in today_cache] force_rerun = st.checkbox("🔄 Force re-run all years", value=False, help="Re-trains even if today's results already exist") trigger_years = SWEEP_YEARS if force_rerun else missing_today col_btn, col_info = st.columns([1, 3]) with col_btn: sweep_btn = st.button( "🚀 Run Consensus Sweep", type="primary", use_container_width=True, disabled=(is_training or (sweep_done and not force_rerun)), help="Triggers parallel GitHub Actions jobs for missing years" ) with col_info: if sweep_done and not force_rerun: st.success(f"✅ Today's sweep complete ({today_est}) — {n_total}/{n_total} years ready") elif is_training: st.warning(f"⏳ Training in progress... ({n_today}/{n_total} done today)") else: st.info( f"**{n_today}/{n_total}** years done for today ({today_est}). \n" f"Will trigger **{len(trigger_years)}** parallel jobs: " f"{', '.join(str(y) for y in trigger_years)}" ) if sweep_btn and trigger_years: sweep_str = ",".join(str(y) for y in trigger_years) with st.spinner(f"🚀 Triggering sweep for {sweep_str}..."): ok = _trigger_github( start_year=trigger_years[0], fee_bps=fee_bps, tsl_pct=tsl_pct, z_reentry=z_reentry, sweep_mode=sweep_str ) if ok: st.success( f"✅ Triggered **{len(trigger_years)}** parallel jobs for: {sweep_str}. \n" f"Each takes ~90 mins. Refresh when complete." ) else: st.error("❌ Failed to trigger GitHub Actions sweep.") # ── Show consensus ──────────────────────────────────────────────────────── if not display_cache: st.info("👆 No sweep results yet. Click **🚀 Run Consensus Sweep** or wait for 8pm EST auto-run.") st.stop() consensus = _compute_consensus(display_cache) if not consensus: st.warning("⚠️ Could not compute consensus.") st.stop() winner = consensus["winner"] w_info = consensus["etf_summary"][winner] win_color = ETF_COLORS.get(winner, "#0066cc") score_pct = w_info["score_share"] * 100 score_pct = score_pct if (score_pct == score_pct) else 0.0 # guard nan split_sig = w_info["score_share"] < 0.40 sig_label = "⚠️ Split Signal" if split_sig else "✅ Clear Signal" note = f"Score share {score_pct:.0f}% · {w_info['n_years']}/{n_total} years · avg score {w_info['cum_score']:.4f}" date_note = f"Results from: {display_date}" # ── Winner banner ───────────────────────────────────────────────────────── st.markdown(f"""
WEIGHTED CONSENSUS · DQN · {len(display_cache)} START YEARS · {date_note}
{winner}
{sig_label} · {note}
Avg Return
{w_info['avg_return']*100:.1f}%
Avg Z
{w_info['avg_z']:.2f}σ
Avg Sharpe
{w_info['avg_sharpe']:.2f}
Avg MaxDD
{w_info['avg_max_dd']*100:.1f}%
""", unsafe_allow_html=True) # Also-ranked others = sorted([(e, v) for e, v in consensus["etf_summary"].items() if e != winner], key=lambda x: -x[1]["cum_score"]) parts = [] for etf, v in others: c = ETF_COLORS.get(etf, "#888") parts.append(f'{etf} ' f'({v["cum_score"]:.2f} · {v["n_years"]}yr)') st.markdown( '
' 'Also ranked: ' + "  |  ".join(parts) + '
', unsafe_allow_html=True ) st.divider() # ── Charts ──────────────────────────────────────────────────────────────── c1, c2 = st.columns(2) with c1: st.markdown("**Weighted Score per ETF**") es = consensus["etf_summary"] sorted_etfs = sorted(es.keys(), key=lambda e: -es[e]["cum_score"]) fig_bar = go.Figure(go.Bar( x=sorted_etfs, y=[es[e]["cum_score"] for e in sorted_etfs], marker_color=[ETF_COLORS.get(e, "#888") for e in sorted_etfs], text=[f"{es[e]['n_years']}yr · {es[e]['score_share']*100 if es[e]['score_share']==es[e]['score_share'] else 0:.0f}%
{es[e]['cum_score']:.2f}" for e in sorted_etfs], textposition="outside", )) fig_bar.update_layout( template="plotly_dark", height=360, yaxis_title="Cumulative Weighted Score", showlegend=False, margin=dict(t=20, b=20) ) st.plotly_chart(fig_bar, use_container_width=True) with c2: st.markdown("**Conviction Z-Score by Start Year**") per_year = consensus["per_year"] fig_sc = go.Figure() for row in per_year: etf = row["signal"] col = ETF_COLORS.get(etf, "#888") fig_sc.add_trace(go.Scatter( x=[row["year"]], y=[row["z_score"]], mode="markers+text", marker=dict(size=18, color=col, line=dict(color="white", width=1)), text=[etf], textposition="top center", name=etf, showlegend=False, hovertemplate=f"{etf}
Year: {row['year']}
" f"Z: {row['z_score']:.2f}σ
" f"Return: {row['ann_return']*100:.1f}%" )) fig_sc.add_hline(y=0, line_dash="dot", line_color="rgba(255,255,255,0.3)", annotation_text="Neutral") fig_sc.update_layout( template="plotly_dark", height=360, xaxis_title="Start Year", yaxis_title="Z-Score (σ)", margin=dict(t=20, b=20) ) st.plotly_chart(fig_sc, use_container_width=True) # ── Per-year breakdown table ────────────────────────────────────────────── st.subheader("📋 Full Per-Year Breakdown") st.caption( "**Wtd Score** = 40% Ann. Return + 20% Z-Score + 20% Sharpe + 20% (–Max DD), " "min-max normalised across years. " f"Results dated: **{display_date}**" ) tbl_rows = [] for row in sorted(per_year, key=lambda r: r["year"]): cached_today = row["year"] in today_cache tbl_rows.append({ "Start Year": row["year"], "Signal": row["signal"], "Wtd Score": round(row["wtd"], 3), "Conviction": row["conviction"], "Z-Score": f"{row['z_score']:.2f}σ", "Ann. Return": f"{row['ann_return']*100:.2f}%", "Sharpe": f"{row['sharpe']:.2f}", "Max Drawdown": f"{row['max_dd']*100:.2f}%", "Lookback": f"{row['lookback']}d", "Date": "✅ Today" if cached_today else f"📅 {display_date}", }) tbl_df = pd.DataFrame(tbl_rows) def _style_sig(val): c = ETF_COLORS.get(val, "#888") return f"background-color:{c}22;color:{c};font-weight:700;" def _style_ret(val): try: v = float(val.replace("%", "")) return "color:#00b894;font-weight:600" if v > 0 else "color:#d63031;font-weight:600" except Exception: return "" styled_tbl = (tbl_df.style .applymap(_style_sig, subset=["Signal"]) .applymap(_style_ret, subset=["Ann. Return"]) .set_properties(**{"text-align": "center", "font-size": "14px"}) .set_table_styles([ {"selector": "th", "props": [("font-size", "14px"), ("font-weight", "bold"), ("text-align", "center"), ("background-color", "#1a1a2e"), ("color", "#0066cc")]}, {"selector": "td", "props": [("padding", "10px")]} ])) st.dataframe(styled_tbl, use_container_width=True, height=280) # ── How to read ─────────────────────────────────────────────────────────── st.divider() st.subheader("📖 How to Read These Results") st.markdown(f""" **Why does the signal change by start year?** Each start year defines the market regime the DQN agent trains on. A model trained from 2008 has seen the GFC, 2013 taper tantrum, COVID, and rate hike cycles. A model from 2019 focuses on post-COVID dynamics. The consensus aggregates all regime views. **How is the winner chosen?** Each year's signal is scored: 40% Ann. Return + 20% Z-Score + 20% Sharpe + 20% (–MaxDD), all min-max normalised so no metric dominates. The ETF with the highest total weighted score wins. **Why is the button disabled when today's results exist?** To protect your GitHub Actions minutes (~{n_total * 90} mins per full sweep). The sweep auto-runs at 8pm EST daily so you rarely need to trigger it manually. **Split Signal warning (score share < 40%)** Signals are fragmented — no single ETF dominates across regimes. Treat with caution. **Date stamp** Results filename includes the run date (e.g. `sweep_2019_20260304.json`). The app only shows today's results if available, otherwise yesterday's with a warning banner. Previous day's files are automatically deleted at 8pm EST before the new sweep starts. """)