diff --git a/app.py b/app.py index bc6e5582806fd23c779112cf110ba46101a0e27d..8145c1839d6d05bd22a71a19079075f3734c3513 100644 --- a/app.py +++ b/app.py @@ -8,6 +8,7 @@ P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES - Ann. Return compared vs SPY in metrics row - Max Daily DD shows date it occurred - Conviction panel: compact ETF probability list +- [NEW] Multi-Year Sweep tab: runs 8 start years, vote tally + comparison table """ import os @@ -32,6 +33,7 @@ from ui.components import ( show_metrics_row, show_comparison_table, show_audit_trail, show_all_signals_panel, ) +from ui.multiyear import run_multiyear_sweep, show_multiyear_results st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="🧠", layout="wide") @@ -43,6 +45,8 @@ for key, default in [ ("test_dates", None), ("test_slice", None), ("optimal_lookback", None), ("df_for_chart", None), ("tbill_rate", None), ("target_etfs", None), ("from_cache", False), + # Multi-year sweep state + ("multiyear_ready", False), ("multiyear_results", None), ]: if key not in st.session_state: st.session_state[key] = default @@ -68,7 +72,6 @@ with st.sidebar: # ── Title ───────────────────────────────────────────────────────────────────── st.title("🧠 P2-ETF-CNN-LSTM") st.caption("Approach 1: Wavelet Β· Approach 2: Regime-Conditioned Β· Approach 3: Multi-Scale Parallel") -st.caption("Winner selected by highest raw annualised return on out-of-sample test set.") if not HF_TOKEN: st.error("❌ HF_TOKEN secret not found.") @@ -83,6 +86,7 @@ if df_raw.empty: freshness = check_data_freshness(df_raw) show_freshness_status(freshness) +last_date_str = str(freshness.get("last_date_in_data", "unknown")) # ── Dataset info sidebar ────────────────────────────────────────────────────── with st.sidebar: @@ -112,7 +116,7 @@ if run_button: st.stop() n_etfs = len(target_etfs) - n_classes = n_etfs # CASH is overlay only β€” model always picks from ETFs + n_classes = n_etfs st.info( f"🎯 **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} Β· " @@ -120,7 +124,6 @@ if run_button: f"**T-bill:** {tbill_rate*100:.2f}%" ) - # ── Raw arrays ──────────────────────────────────────────────────────────── X_raw = df[input_features].values.astype(np.float32) y_raw = np.clip(df[target_etfs].values.astype(np.float32), -0.5, 0.5) @@ -133,8 +136,6 @@ if run_button: if mask.any(): y_raw[mask, j] = 0.0 - last_date_str = str(freshness.get("last_date_in_data", "unknown")) - # ── Auto-select lookback ────────────────────────────────────────────────── lb_key = make_cache_key(last_date_str, start_yr, fee_bps, int(epochs), split_option, False, 0) @@ -186,12 +187,11 @@ if run_button: results, trained_info = {}, {} progress = st.progress(0, text="Training Approach 1...") - for approach, train_fn, predict_fn, train_kwargs in [ + for approach, train_fn, predict_fn in [ ("Approach 1", lambda: train_approach1(X_train_s, y_train_l, X_val_s, y_val_l, n_classes=n_classes, epochs=int(epochs)), - lambda m: predict_approach1(m[0], X_test_s), - None), + lambda m: predict_approach1(m[0], X_test_s)), ("Approach 2", lambda: train_approach2(X_train_s, y_train_l, X_val_s, y_val_l, X_flat_all=X_raw, feature_names=input_features, @@ -199,13 +199,11 @@ if run_button: val_size=val_size, n_classes=n_classes, epochs=int(epochs)), lambda m: predict_approach2(m[0], X_test_s, X_raw, m[3], m[2], - lookback, train_size, val_size), - None), + lookback, train_size, val_size)), ("Approach 3", lambda: train_approach3(X_train_s, y_train_l, X_val_s, y_val_l, n_classes=n_classes, epochs=int(epochs)), - lambda m: predict_approach3(m[0], X_test_s), - None), + lambda m: predict_approach3(m[0], X_test_s)), ]: try: model_out = train_fn() @@ -229,7 +227,6 @@ if run_button: "test_dates": list(test_dates), "test_slice": test_slice, }) - # ── Persist to session state ────────────────────────────────────────────── st.session_state.update({ "results": results, "trained_info": trained_info, "test_dates": test_dates, "test_slice": test_slice, @@ -238,68 +235,116 @@ if run_button: "output_ready": True, }) -# ── Render (persists across reruns via session_state) ───────────────────────── -if not st.session_state.output_ready: - st.info("πŸ‘ˆ Configure parameters and click **πŸš€ Run All 3 Approaches**.") - st.stop() +# ── TABS ────────────────────────────────────────────────────────────────────── +tab_single, tab_sweep = st.tabs(["πŸ“Š Single-Year Results", "πŸ” Multi-Year Consensus Sweep"]) -results = st.session_state.results -trained_info = st.session_state.trained_info -test_dates = st.session_state.test_dates -test_slice = st.session_state.test_slice -optimal_lookback = st.session_state.optimal_lookback -df = st.session_state.df_for_chart -tbill_rate = st.session_state.tbill_rate -target_etfs = st.session_state.target_etfs +# ══════════════════════════════════════════════════════════════════════════════ +# TAB 1 β€” existing single-year output (unchanged) +# ══════════════════════════════════════════════════════════════════════════════ +with tab_single: + if not st.session_state.output_ready: + st.info("πŸ‘ˆ Configure parameters and click **πŸš€ Run All 3 Approaches**.") + st.stop() -winner_name = select_winner(results) -winner_res = results.get(winner_name) + results = st.session_state.results + trained_info = st.session_state.trained_info + test_dates = st.session_state.test_dates + test_slice = st.session_state.test_slice + optimal_lookback = st.session_state.optimal_lookback + df = st.session_state.df_for_chart + tbill_rate = st.session_state.tbill_rate + target_etfs = st.session_state.target_etfs -if winner_res is None: - st.error("❌ All approaches failed.") - st.stop() + winner_name = select_winner(results) + winner_res = results.get(winner_name) -if st.session_state.from_cache: - st.success("⚑ Showing cached results.") + if winner_res is None: + st.error("❌ All approaches failed.") + st.stop() -next_date = get_next_signal_date() -st.divider() + st.caption("Winner selected by highest raw annualised return on out-of-sample test set.") -show_signal_banner(winner_res["next_signal"], next_date, winner_name) + next_date = get_next_signal_date() + st.divider() -winner_proba = trained_info[winner_name]["proba"] -conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash=False) -show_conviction_panel(conviction) + show_signal_banner(winner_res["next_signal"], next_date, winner_name) -st.divider() + winner_proba = trained_info[winner_name]["proba"] + conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash=False) + show_conviction_panel(conviction) -all_signals = { - name: {"signal": res["next_signal"], - "proba": trained_info[name]["proba"][-1], - "is_winner": name == winner_name} - for name, res in results.items() if res is not None -} -show_all_signals_panel(all_signals, target_etfs, False, next_date, optimal_lookback) + st.divider() -st.divider() -st.subheader(f"πŸ“Š {winner_name} β€” Performance Metrics") + all_signals = { + name: {"signal": res["next_signal"], + "proba": trained_info[name]["proba"][-1], + "is_winner": name == winner_name} + for name, res in results.items() if res is not None + } + show_all_signals_panel(all_signals, target_etfs, False, next_date, optimal_lookback) -# Compute SPY annualised return directly from raw returns for metrics comparison -spy_ann = None -if "SPY_Ret" in df.columns: - spy_raw = df["SPY_Ret"].iloc[test_slice].values.copy().astype(float) - spy_raw = spy_raw[~np.isnan(spy_raw)] - spy_raw = np.clip(spy_raw, -0.5, 0.5) - if len(spy_raw) > 5: - spy_cum = np.prod(1 + spy_raw) - spy_ann = float(spy_cum ** (252 / len(spy_raw)) - 1) + st.divider() + st.subheader(f"πŸ“Š {winner_name} β€” Performance Metrics") + + spy_ann = None + if "SPY_Ret" in df.columns: + spy_raw = df["SPY_Ret"].iloc[test_slice].values.copy().astype(float) + spy_raw = spy_raw[~np.isnan(spy_raw)] + spy_raw = np.clip(spy_raw, -0.5, 0.5) + if len(spy_raw) > 5: + spy_cum = np.prod(1 + spy_raw) + spy_ann = float(spy_cum ** (252 / len(spy_raw)) - 1) + + show_metrics_row(winner_res, tbill_rate, spy_ann_return=spy_ann) -show_metrics_row(winner_res, tbill_rate, spy_ann_return=spy_ann) + st.divider() + st.subheader("πŸ† Approach Comparison (Winner = Highest Raw Annualised Return)") + show_comparison_table(build_comparison_table(results, winner_name)) -st.divider() -st.subheader("πŸ† Approach Comparison (Winner = Highest Raw Annualised Return)") -show_comparison_table(build_comparison_table(results, winner_name)) + st.divider() + st.subheader(f"πŸ“‹ Audit Trail β€” {winner_name} (Last 20 Trading Days)") + show_audit_trail(winner_res["audit_trail"]) + + +# ══════════════════════════════════════════════════════════════════════════════ +# TAB 2 β€” Multi-Year Consensus Sweep +# ══════════════════════════════════════════════════════════════════════════════ +with tab_sweep: + st.subheader("πŸ” Multi-Year Consensus Sweep") + st.markdown( + "Runs the winner model (Approach 2 proxy) across **8 start years** " + "and aggregates signals into a consensus vote. " + "Each year uses the same fee, epochs, and split settings as the sidebar. " + "Results are cached β€” only untrained years incur compute." + ) -st.divider() -st.subheader(f"πŸ“‹ Audit Trail β€” {winner_name} (Last 20 Trading Days)") -show_audit_trail(winner_res["audit_trail"]) + SWEEP_YEARS = [2010, 2012, 2014, 2016, 2018, 2019, 2021, 2023] + + col_l, col_r = st.columns([2, 1]) + with col_l: + st.caption(f"Sweep years: {', '.join(str(y) for y in SWEEP_YEARS)}") + with col_r: + sweep_button = st.button("πŸš€ Run Consensus Sweep", type="primary", use_container_width=True) + + if sweep_button: + st.session_state.multiyear_ready = False + sweep_results = run_multiyear_sweep( + df_raw = df_raw, + sweep_years = SWEEP_YEARS, + fee_bps = fee_bps, + epochs = int(epochs), + split_option = split_option, + last_date_str = last_date_str, + train_pct = train_pct, + val_pct = val_pct, + ) + st.session_state.multiyear_results = sweep_results + st.session_state.multiyear_ready = True + + if st.session_state.multiyear_ready and st.session_state.multiyear_results: + show_multiyear_results( + st.session_state.multiyear_results, + sweep_years = SWEEP_YEARS, + ) + elif not st.session_state.multiyear_ready: + st.info("Click **πŸš€ Run Consensus Sweep** to analyse all start years at once.") diff --git a/hf_space/data/loader.py b/hf_space/data/loader.py index 0f9fef47d7abc5d77c115965ba7fccaa081841a8..144589aa5bdeeb4844546502ccc88faf3dcb764f 100644 --- a/hf_space/data/loader.py +++ b/hf_space/data/loader.py @@ -4,14 +4,12 @@ Loads master_data.parquet from HF Dataset. Engineers rich feature set from raw price/macro columns. No external pings β€” all data from HF Dataset only. """ - import pandas as pd import numpy as np import streamlit as st from huggingface_hub import hf_hub_download from datetime import datetime, timedelta import pytz - try: import pandas_market_calendars as mcal NYSE_CAL_AVAILABLE = True @@ -20,14 +18,12 @@ except ImportError: DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data" PARQUET_FILE = "master_data.parquet" -TARGET_ETF_COLS = ["TLT", "TBT", "VNQ", "SLV", "GLD"] +TARGET_ETF_COLS = ["TLT", "VNQ", "SLV", "GLD", "LQD", "HYG", "VCIT"] BENCHMARK_COLS = ["SPY", "AGG"] TBILL_COL = "TBILL_3M" MACRO_COLS = ["VIX", "DXY", "T10Y2Y", "IG_SPREAD", "HY_SPREAD"] - # ── NYSE calendar ───────────────────────────────────────────────────────────── - def get_last_nyse_trading_day(as_of=None): est = pytz.timezone("US/Eastern") if as_of is None: @@ -46,9 +42,7 @@ def get_last_nyse_trading_day(as_of=None): candidate -= timedelta(days=1) return candidate - # ── Data loading ────────────────────────────────────────────────────────────── - @st.cache_data(ttl=3600, show_spinner=False) def load_dataset(hf_token: str) -> pd.DataFrame: try: @@ -64,15 +58,13 @@ def load_dataset(hf_token: str) -> pd.DataFrame: if col in df.columns: df = df.set_index(col) break - df.index = pd.to_datetime(df.index) + df.index = pd.to_datetime(df.index) return df.sort_index() except Exception as e: st.error(f"❌ Failed to load dataset: {e}") return pd.DataFrame() - # ── Freshness check ─────────────────────────────────────────────────────────── - def check_data_freshness(df: pd.DataFrame) -> dict: if df.empty: return {"fresh": False, "message": "Dataset is empty."} @@ -80,16 +72,14 @@ def check_data_freshness(df: pd.DataFrame) -> dict: expect = get_last_nyse_trading_day() fresh = last >= expect msg = ( - f"βœ… Dataset up to date through **{last}**." if fresh else - f"⚠️ **{expect}** data not yet updated. Latest: **{last}**. " + f"βœ… Dataset up to date through {last}." if fresh else + f"⚠️ {expect} data not yet updated. Latest: {last}. " f"Dataset updates daily after market close." ) return {"fresh": fresh, "last_date_in_data": last, "expected_date": expect, "message": msg} - # ── Price β†’ returns ─────────────────────────────────────────────────────────── - def _to_returns(series: pd.Series) -> pd.Series: """Convert price series to daily pct returns. If already returns, pass through.""" clean = series.dropna() @@ -99,13 +89,10 @@ def _to_returns(series: pd.Series) -> pd.Series: return series.pct_change() return series # already returns - # ── Feature engineering ─────────────────────────────────────────────────────── - def _engineer_features(df: pd.DataFrame, ret_cols: list) -> pd.DataFrame: """ Build a rich feature set from raw macro + ETF return columns. - Features added per ETF return: - 1d, 5d, 21d lagged returns - 5d, 21d rolling volatility @@ -120,7 +107,7 @@ def _engineer_features(df: pd.DataFrame, ret_cols: list) -> pd.DataFrame: - TBILL_3M as a feature (rate level) - VIX regime flag (VIX > 25) - Yield curve slope (already T10Y2Y) - - Cross-asset momentum: spread between TLT_ret and TBT_ret + - Cross-asset momentum: spread between TLT_ret and AGG_ret """ feat = pd.DataFrame(index=df.index) @@ -154,9 +141,9 @@ def _engineer_features(df: pd.DataFrame, ret_cols: list) -> pd.DataFrame: feat["TBILL_chg5"] = tbill.diff(5) # ── Derived cross-asset signals ─────────────────────────────────────────── - if "TLT_Ret" in df.columns and "TBT_Ret" in df.columns: - feat["TLT_TBT_spread_mom5"] = ( - df["TLT_Ret"].rolling(5).sum() - df["TBT_Ret"].rolling(5).sum() + if "TLT_Ret" in df.columns and "AGG_Ret" in df.columns: + feat["TLT_AGG_spread_mom5"] = ( + df["TLT_Ret"].rolling(5).sum() - df["AGG_Ret"].rolling(5).sum() ) if "VIX" in df.columns: @@ -171,13 +158,10 @@ def _engineer_features(df: pd.DataFrame, ret_cols: list) -> pd.DataFrame: return feat - # ── Main extraction function ────────────────────────────────────────────────── - def get_features_and_targets(df: pd.DataFrame): """ Build return columns for target ETFs and engineer a rich feature set. - Returns: input_features : list[str] target_etfs : list[str] e.g. ["TLT_Ret", ...] @@ -240,9 +224,7 @@ def get_features_and_targets(df: pd.DataFrame): return input_features, target_etfs, tbill_rate, df, col_info - # ── Dataset summary ─────────────────────────────────────────────────────────── - def dataset_summary(df: pd.DataFrame) -> dict: if df.empty: return {} diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py index b944ad0559ead182d0413386f9c49985e5059d66..bc6e5582806fd23c779112cf110ba46101a0e27d 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py @@ -32,7 +32,6 @@ from ui.components import ( show_metrics_row, show_comparison_table, show_audit_trail, show_all_signals_panel, ) -from ui.charts import equity_curve_chart st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="🧠", layout="wide") @@ -169,7 +168,7 @@ if run_button: st.success("⚑ Results loaded from cache β€” no retraining needed.") else: X_seq, y_seq = build_sequences(X_raw, y_raw, lookback) - y_labels = returns_to_labels(y_seq, include_cash=False) + y_labels = returns_to_labels(y_seq) (X_train, y_train_r, X_val, y_val_r, X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct) @@ -285,18 +284,22 @@ show_all_signals_panel(all_signals, target_etfs, False, next_date, optimal_lookb st.divider() st.subheader(f"πŸ“Š {winner_name} β€” Performance Metrics") -# Build equity curve first to get spy_ann for metrics comparison -fig, spy_ann = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate) +# Compute SPY annualised return directly from raw returns for metrics comparison +spy_ann = None +if "SPY_Ret" in df.columns: + spy_raw = df["SPY_Ret"].iloc[test_slice].values.copy().astype(float) + spy_raw = spy_raw[~np.isnan(spy_raw)] + spy_raw = np.clip(spy_raw, -0.5, 0.5) + if len(spy_raw) > 5: + spy_cum = np.prod(1 + spy_raw) + spy_ann = float(spy_cum ** (252 / len(spy_raw)) - 1) + show_metrics_row(winner_res, tbill_rate, spy_ann_return=spy_ann) st.divider() st.subheader("πŸ† Approach Comparison (Winner = Highest Raw Annualised Return)") show_comparison_table(build_comparison_table(results, winner_name)) -st.divider() -st.subheader(f"πŸ“ˆ {winner_name} vs SPY & AGG β€” Out-of-Sample") -st.plotly_chart(fig, use_container_width=True) - st.divider() st.subheader(f"πŸ“‹ Audit Trail β€” {winner_name} (Last 20 Trading Days)") show_audit_trail(winner_res["audit_trail"]) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py index 48a79bdac6d956eecd66f6930db41223bdf743ef..b944ad0559ead182d0413386f9c49985e5059d66 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py @@ -148,8 +148,7 @@ if run_button: with st.spinner("πŸ” Auto-selecting optimal lookback (30 / 45 / 60d)..."): optimal_lookback = find_best_lookback( X_raw, y_raw, - lambda y: returns_to_labels(y, include_cash=False), - train_pct, val_pct, n_classes, False, + train_pct, val_pct, n_classes, candidates=[30, 45, 60], ) save_cache(f"lb_{lb_key}", {"optimal_lookback": optimal_lookback}) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py index 6b05505276c6be72f7860e645761efee6a3b9bf5..48a79bdac6d956eecd66f6930db41223bdf743ef 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py @@ -1,7 +1,13 @@ """ app.py P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES -Streamlit orchestrator β€” UI wiring only, no business logic here. +- Session state persistence (results don't vanish on rerun) +- Model caching keyed by data date + config params +- Auto-lookback (30/45/60d) +- CASH is a drawdown risk overlay (not a model class) +- Ann. Return compared vs SPY in metrics row +- Max Daily DD shows date it occurred +- Conviction panel: compact ETF probability list """ import os @@ -32,11 +38,20 @@ st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="🧠", layout="wide" HF_TOKEN = os.getenv("HF_TOKEN", "") +# ── Session state init ──────────────────────────────────────────────────────── +for key, default in [ + ("output_ready", False), ("results", None), ("trained_info", None), + ("test_dates", None), ("test_slice", None), ("optimal_lookback", None), + ("df_for_chart", None), ("tbill_rate", None), ("target_etfs", None), + ("from_cache", False), +]: + if key not in st.session_state: + st.session_state[key] = default + # ── Sidebar ─────────────────────────────────────────────────────────────────── with st.sidebar: st.header("βš™οΈ Configuration") - now_est = get_est_time() - st.write(f"πŸ•’ **EST:** {now_est.strftime('%H:%M:%S')}") + st.write(f"πŸ•’ **EST:** {get_est_time().strftime('%H:%M:%S')}") st.divider() start_yr = st.slider("πŸ“… Start Year", 2010, 2024, 2016) @@ -47,9 +62,7 @@ with st.sidebar: split_option = st.selectbox("πŸ“Š Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0) train_pct, val_pct = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}[split_option] - include_cash = st.checkbox("πŸ’΅ Include CASH class", value=True, - help="Model can select CASH (earns T-bill rate) instead of any ETF") - + st.caption("πŸ’‘ CASH triggered automatically on 2-day drawdown ≀ βˆ’15%") st.divider() run_button = st.button("πŸš€ Run All 3 Approaches", type="primary", use_container_width=True) @@ -85,226 +98,206 @@ with st.sidebar: st.write(f"**Macro:** {', '.join(summary['macro_found'])}") st.write(f"**T-bill col:** {'βœ…' if summary['tbill_found'] else '❌'}") -if not run_button: - st.info("πŸ‘ˆ Configure parameters and click **πŸš€ Run All 3 Approaches**.") - st.stop() - -# ── Filter by start year ────────────────────────────────────────────────────── -df = df_raw[df_raw.index.year >= start_yr].copy() -st.write(f"πŸ“… **Data:** {df.index[0].strftime('%Y-%m-%d')} β†’ {df.index[-1].strftime('%Y-%m-%d')} " - f"({df.index[-1].year - df.index[0].year + 1} years)") +# ── Run button ──────────────────────────────────────────────────────────────── +if run_button: + st.session_state.output_ready = False -# ── Features & targets ──────────────────────────────────────────────────────── -try: - input_features, target_etfs, tbill_rate, df, _ = get_features_and_targets(df) -except ValueError as e: - st.error(str(e)) - st.stop() + df = df_raw[df_raw.index.year >= start_yr].copy() + st.write(f"πŸ“… **Data:** {df.index[0].strftime('%Y-%m-%d')} β†’ {df.index[-1].strftime('%Y-%m-%d')} " + f"({df.index[-1].year - df.index[0].year + 1} years)") -n_etfs = len(target_etfs) -n_classes = n_etfs + (1 if include_cash else 0) - -st.info( - f"🎯 **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} Β· " - f"**Features:** {len(input_features)} signals Β· " - f"**T-bill:** {tbill_rate*100:.2f}%" -) - -# ── Prepare raw arrays ──────────────────────────────────────────────────────── -X_raw = df[input_features].values.astype(np.float32) -y_raw = df[target_etfs].values.astype(np.float32) - -for j in range(X_raw.shape[1]): - mask = np.isnan(X_raw[:, j]) - if mask.any(): - X_raw[mask, j] = np.nanmean(X_raw[:, j]) -for j in range(y_raw.shape[1]): - mask = np.isnan(y_raw[:, j]) - if mask.any(): - y_raw[mask, j] = np.nanmean(y_raw[:, j]) - -# ── Auto-select optimal lookback ────────────────────────────────────────────── -last_date_str = str(freshness.get("last_date_in_data", "unknown")) - -# Check cache for lookback selection too -lb_cache_key = make_cache_key( - last_date_str, start_yr, fee_bps, int(epochs), split_option, include_cash, 0 -) -lb_cached = load_cache(f"lb_{lb_cache_key}") - -if lb_cached is not None: - optimal_lookback = lb_cached["optimal_lookback"] - st.success(f"⚑ Loaded from cache Β· Optimal lookback: **{optimal_lookback}d**") -else: - with st.spinner("πŸ” Finding optimal lookback (30 / 45 / 60d)..."): - def _y_labels_fn(y_seq): - return returns_to_labels(y_seq, include_cash=include_cash) - optimal_lookback = find_best_lookback( - X_raw, y_raw, _y_labels_fn, - train_pct, val_pct, n_classes, include_cash, - candidates=[30, 45, 60], - ) - save_cache(f"lb_{lb_cache_key}", {"optimal_lookback": optimal_lookback}) - st.success(f"πŸ“ Optimal lookback: **{optimal_lookback}d** (auto-selected from 30/45/60)") - -lookback = optimal_lookback - -# ── Check full model cache ──────────────────────────────────────────────────── -cache_key = make_cache_key(last_date_str, start_yr, fee_bps, int(epochs), - split_option, include_cash, lookback) -cached_data = load_cache(cache_key) -from_cache = cached_data is not None - -if from_cache: - results = cached_data["results"] - trained_info = cached_data["trained_info"] - test_dates = pd.DatetimeIndex(cached_data["test_dates"]) - test_slice = cached_data["test_slice"] - st.success("⚑ Results loaded from cache β€” no retraining needed.") -else: - # ── Build sequences ─────────────────────────────────────────────────────── - X_seq, y_seq = build_sequences(X_raw, y_raw, lookback) - y_labels = returns_to_labels(y_seq, include_cash=include_cash) - - (X_train, y_train_r, X_val, y_val_r, - X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct) - (_, y_train_l, _, y_val_l, - _, _) = train_val_test_split(X_seq, y_labels, train_pct, val_pct) - - X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test) - - train_size = len(X_train) - val_size = len(X_val) - test_start = lookback + train_size + val_size - test_dates = df.index[test_start: test_start + len(X_test)] - test_slice = slice(test_start, test_start + len(X_test)) - - results = {} - trained_info = {} - progress = st.progress(0, text="Training Approach 1...") - - # ── Approach 1 ──────────────────────────────────────────────────────────── - try: - model1, _, _ = train_approach1( - X_train_s, y_train_l, X_val_s, y_val_l, - n_classes=n_classes, epochs=int(epochs), - ) - preds1, proba1 = predict_approach1(model1, X_test_s) - results["Approach 1"] = execute_strategy( - preds1, proba1, y_test_r, test_dates, - target_etfs, fee_bps, tbill_rate, include_cash, - ) - trained_info["Approach 1"] = {"proba": proba1} - except Exception as e: - st.warning(f"⚠️ Approach 1 failed: {e}") - results["Approach 1"] = None - - progress.progress(33, text="Training Approach 2...") - - # ── Approach 2 ──────────────────────────────────────────────────────────── try: - model2, _, hmm2, regime_cols2 = train_approach2( - X_train_s, y_train_l, X_val_s, y_val_l, - X_flat_all=X_raw, feature_names=input_features, - lookback=lookback, train_size=train_size, val_size=val_size, - n_classes=n_classes, epochs=int(epochs), - ) - preds2, proba2 = predict_approach2( - model2, X_test_s, X_raw, regime_cols2, hmm2, - lookback, train_size, val_size, - ) - results["Approach 2"] = execute_strategy( - preds2, proba2, y_test_r, test_dates, - target_etfs, fee_bps, tbill_rate, include_cash, - ) - trained_info["Approach 2"] = {"proba": proba2} - except Exception as e: - st.warning(f"⚠️ Approach 2 failed: {e}") - results["Approach 2"] = None - - progress.progress(66, text="Training Approach 3...") - - # ── Approach 3 ──────────────────────────────────────────────────────────── - try: - model3, _ = train_approach3( - X_train_s, y_train_l, X_val_s, y_val_l, - n_classes=n_classes, epochs=int(epochs), - ) - preds3, proba3 = predict_approach3(model3, X_test_s) - results["Approach 3"] = execute_strategy( - preds3, proba3, y_test_r, test_dates, - target_etfs, fee_bps, tbill_rate, include_cash, - ) - trained_info["Approach 3"] = {"proba": proba3} - except Exception as e: - st.warning(f"⚠️ Approach 3 failed: {e}") - results["Approach 3"] = None - - progress.progress(100, text="Done!") - progress.empty() - - # ── Save to cache ───────────────────────────────────────────────────────── - save_cache(cache_key, { - "results": results, - "trained_info": trained_info, - "test_dates": list(test_dates), - "test_slice": test_slice, + input_features, target_etfs, tbill_rate, df, _ = get_features_and_targets(df) + except ValueError as e: + st.error(str(e)) + st.stop() + + n_etfs = len(target_etfs) + n_classes = n_etfs # CASH is overlay only β€” model always picks from ETFs + + st.info( + f"🎯 **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} Β· " + f"**Features:** {len(input_features)} signals Β· " + f"**T-bill:** {tbill_rate*100:.2f}%" + ) + + # ── Raw arrays ──────────────────────────────────────────────────────────── + X_raw = df[input_features].values.astype(np.float32) + y_raw = np.clip(df[target_etfs].values.astype(np.float32), -0.5, 0.5) + + for j in range(X_raw.shape[1]): + mask = np.isnan(X_raw[:, j]) + if mask.any(): + X_raw[mask, j] = np.nanmean(X_raw[:, j]) + for j in range(y_raw.shape[1]): + mask = np.isnan(y_raw[:, j]) + if mask.any(): + y_raw[mask, j] = 0.0 + + last_date_str = str(freshness.get("last_date_in_data", "unknown")) + + # ── Auto-select lookback ────────────────────────────────────────────────── + lb_key = make_cache_key(last_date_str, start_yr, fee_bps, int(epochs), + split_option, False, 0) + lb_cached = load_cache(f"lb_{lb_key}") + + if lb_cached is not None: + optimal_lookback = lb_cached["optimal_lookback"] + st.success(f"⚑ Cache hit Β· Optimal lookback: **{optimal_lookback}d**") + else: + with st.spinner("πŸ” Auto-selecting optimal lookback (30 / 45 / 60d)..."): + optimal_lookback = find_best_lookback( + X_raw, y_raw, + lambda y: returns_to_labels(y, include_cash=False), + train_pct, val_pct, n_classes, False, + candidates=[30, 45, 60], + ) + save_cache(f"lb_{lb_key}", {"optimal_lookback": optimal_lookback}) + st.success(f"πŸ“ Optimal lookback: **{optimal_lookback}d** (auto-selected from 30/45/60)") + + lookback = optimal_lookback + + # ── Check model cache ───────────────────────────────────────────────────── + cache_key = make_cache_key(last_date_str, start_yr, fee_bps, int(epochs), + split_option, False, lookback) + cached_data = load_cache(cache_key) + + if cached_data is not None: + results = cached_data["results"] + trained_info = cached_data["trained_info"] + test_dates = pd.DatetimeIndex(cached_data["test_dates"]) + test_slice = cached_data["test_slice"] + st.success("⚑ Results loaded from cache β€” no retraining needed.") + else: + X_seq, y_seq = build_sequences(X_raw, y_raw, lookback) + y_labels = returns_to_labels(y_seq, include_cash=False) + + (X_train, y_train_r, X_val, y_val_r, + X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct) + (_, y_train_l, _, y_val_l, + _, _) = train_val_test_split(X_seq, y_labels, train_pct, val_pct) + + X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test) + + train_size = len(X_train) + val_size = len(X_val) + test_start = lookback + train_size + val_size + test_dates = df.index[test_start: test_start + len(X_test)] + test_slice = slice(test_start, test_start + len(X_test)) + + results, trained_info = {}, {} + progress = st.progress(0, text="Training Approach 1...") + + for approach, train_fn, predict_fn, train_kwargs in [ + ("Approach 1", + lambda: train_approach1(X_train_s, y_train_l, X_val_s, y_val_l, + n_classes=n_classes, epochs=int(epochs)), + lambda m: predict_approach1(m[0], X_test_s), + None), + ("Approach 2", + lambda: train_approach2(X_train_s, y_train_l, X_val_s, y_val_l, + X_flat_all=X_raw, feature_names=input_features, + lookback=lookback, train_size=train_size, + val_size=val_size, n_classes=n_classes, + epochs=int(epochs)), + lambda m: predict_approach2(m[0], X_test_s, X_raw, m[3], m[2], + lookback, train_size, val_size), + None), + ("Approach 3", + lambda: train_approach3(X_train_s, y_train_l, X_val_s, y_val_l, + n_classes=n_classes, epochs=int(epochs)), + lambda m: predict_approach3(m[0], X_test_s), + None), + ]: + try: + model_out = train_fn() + preds, proba = predict_fn(model_out) + results[approach] = execute_strategy( + preds, proba, y_test_r, test_dates, + target_etfs, fee_bps, tbill_rate, + ) + trained_info[approach] = {"proba": proba} + except Exception as e: + st.warning(f"⚠️ {approach} failed: {e}") + results[approach] = None + + pct = {"Approach 1": 33, "Approach 2": 66, "Approach 3": 100}[approach] + progress.progress(pct, text=f"{approach} done...") + + progress.empty() + + save_cache(cache_key, { + "results": results, "trained_info": trained_info, + "test_dates": list(test_dates), "test_slice": test_slice, + }) + + # ── Persist to session state ────────────────────────────────────────────── + st.session_state.update({ + "results": results, "trained_info": trained_info, + "test_dates": test_dates, "test_slice": test_slice, + "optimal_lookback": optimal_lookback, "df_for_chart": df, + "tbill_rate": tbill_rate, "target_etfs": target_etfs, + "output_ready": True, }) -# ── Select winner ───────────────────────────────────────────────────────────── +# ── Render (persists across reruns via session_state) ───────────────────────── +if not st.session_state.output_ready: + st.info("πŸ‘ˆ Configure parameters and click **πŸš€ Run All 3 Approaches**.") + st.stop() + +results = st.session_state.results +trained_info = st.session_state.trained_info +test_dates = st.session_state.test_dates +test_slice = st.session_state.test_slice +optimal_lookback = st.session_state.optimal_lookback +df = st.session_state.df_for_chart +tbill_rate = st.session_state.tbill_rate +target_etfs = st.session_state.target_etfs + winner_name = select_winner(results) winner_res = results.get(winner_name) if winner_res is None: - st.error("❌ All approaches failed. Please check data and configuration.") + st.error("❌ All approaches failed.") st.stop() +if st.session_state.from_cache: + st.success("⚑ Showing cached results.") + next_date = get_next_signal_date() st.divider() -# ── Winner signal banner ────────────────────────────────────────────────────── show_signal_banner(winner_res["next_signal"], next_date, winner_name) -# ── Conviction panel ────────────────────────────────────────────────────────── winner_proba = trained_info[winner_name]["proba"] -conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash) +conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash=False) show_conviction_panel(conviction) st.divider() -# ── All models next day signals ─────────────────────────────────────────────── all_signals = { - name: { - "signal": res["next_signal"], - "proba": trained_info[name]["proba"][-1], - "is_winner": name == winner_name, - } + name: {"signal": res["next_signal"], + "proba": trained_info[name]["proba"][-1], + "is_winner": name == winner_name} for name, res in results.items() if res is not None } -show_all_signals_panel(all_signals, target_etfs, include_cash, next_date, optimal_lookback) +show_all_signals_panel(all_signals, target_etfs, False, next_date, optimal_lookback) st.divider() - -# ── Winner performance metrics ──────────────────────────────────────────────── st.subheader(f"πŸ“Š {winner_name} β€” Performance Metrics") -show_metrics_row(winner_res, tbill_rate) -st.divider() +# Build equity curve first to get spy_ann for metrics comparison +fig, spy_ann = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate) +show_metrics_row(winner_res, tbill_rate, spy_ann_return=spy_ann) -# ── Comparison table ────────────────────────────────────────────────────────── +st.divider() st.subheader("πŸ† Approach Comparison (Winner = Highest Raw Annualised Return)") -comparison_df = build_comparison_table(results, winner_name) -show_comparison_table(comparison_df) +show_comparison_table(build_comparison_table(results, winner_name)) st.divider() - -# ── Equity curve ────────────────────────────────────────────────────────────── st.subheader(f"πŸ“ˆ {winner_name} vs SPY & AGG β€” Out-of-Sample") -fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate) st.plotly_chart(fig, use_container_width=True) st.divider() - -# ── Audit trail ─────────────────────────────────────────────────────────────── st.subheader(f"πŸ“‹ Audit Trail β€” {winner_name} (Last 20 Trading Days)") show_audit_trail(winner_res["audit_trail"]) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py index 41b113eda1b094c1a0f6b40036d922934b8f2def..6b05505276c6be72f7860e645761efee6a3b9bf5 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py @@ -13,7 +13,9 @@ from data.loader import (load_dataset, check_data_freshness, get_features_and_targets, dataset_summary) from utils.calendar import get_est_time, get_next_signal_date from models.base import (build_sequences, train_val_test_split, - scale_features, returns_to_labels) + scale_features, returns_to_labels, + find_best_lookback, make_cache_key, + save_cache, load_cache) from models.approach1_wavelet import train_approach1, predict_approach1 from models.approach2_regime import train_approach2, predict_approach2 from models.approach3_multiscale import train_approach3, predict_approach3 @@ -39,8 +41,7 @@ with st.sidebar: start_yr = st.slider("πŸ“… Start Year", 2010, 2024, 2016) fee_bps = st.slider("πŸ’° Fee (bps)", 0, 50, 10) - lookback = st.slider("πŸ“ Lookback (days)", 20, 60, 30, step=5) - epochs = st.number_input("πŸ” Max Epochs", 20, 300, 100, step=10) + epochs = st.number_input("πŸ” Max Epochs", 20, 150, 80, step=10) st.divider() split_option = st.selectbox("πŸ“Š Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0) @@ -109,7 +110,7 @@ st.info( f"**T-bill:** {tbill_rate*100:.2f}%" ) -# ── Build sequences ─────────────────────────────────────────────────────────── +# ── Prepare raw arrays ──────────────────────────────────────────────────────── X_raw = df[input_features].values.astype(np.float32) y_raw = df[target_etfs].values.astype(np.float32) @@ -117,39 +118,74 @@ for j in range(X_raw.shape[1]): mask = np.isnan(X_raw[:, j]) if mask.any(): X_raw[mask, j] = np.nanmean(X_raw[:, j]) - for j in range(y_raw.shape[1]): mask = np.isnan(y_raw[:, j]) if mask.any(): y_raw[mask, j] = np.nanmean(y_raw[:, j]) -X_seq, y_seq = build_sequences(X_raw, y_raw, lookback) -y_labels = returns_to_labels(y_seq, include_cash=include_cash) - -(X_train, y_train_r, X_val, y_val_r, - X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct) -(_, y_train_l, _, y_val_l, - _, _) = train_val_test_split(X_seq, y_labels, train_pct, val_pct) - -X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test) - -train_size = len(X_train) -val_size = len(X_val) -test_start = lookback + train_size + val_size -test_dates = df.index[test_start: test_start + len(X_test)] -test_slice = slice(test_start, test_start + len(X_test)) - -st.success(f"βœ… Sequences β€” Train: {train_size:,} Β· Val: {val_size:,} Β· Test: {len(X_test):,}") +# ── Auto-select optimal lookback ────────────────────────────────────────────── +last_date_str = str(freshness.get("last_date_in_data", "unknown")) -# ── Train all three approaches ──────────────────────────────────────────────── -results = {} -trained_info = {} -progress = st.progress(0, text="Starting training...") - -# Approach 1 -with st.spinner("🌊 Training Approach 1 β€” Wavelet CNN-LSTM..."): +# Check cache for lookback selection too +lb_cache_key = make_cache_key( + last_date_str, start_yr, fee_bps, int(epochs), split_option, include_cash, 0 +) +lb_cached = load_cache(f"lb_{lb_cache_key}") + +if lb_cached is not None: + optimal_lookback = lb_cached["optimal_lookback"] + st.success(f"⚑ Loaded from cache Β· Optimal lookback: **{optimal_lookback}d**") +else: + with st.spinner("πŸ” Finding optimal lookback (30 / 45 / 60d)..."): + def _y_labels_fn(y_seq): + return returns_to_labels(y_seq, include_cash=include_cash) + optimal_lookback = find_best_lookback( + X_raw, y_raw, _y_labels_fn, + train_pct, val_pct, n_classes, include_cash, + candidates=[30, 45, 60], + ) + save_cache(f"lb_{lb_cache_key}", {"optimal_lookback": optimal_lookback}) + st.success(f"πŸ“ Optimal lookback: **{optimal_lookback}d** (auto-selected from 30/45/60)") + +lookback = optimal_lookback + +# ── Check full model cache ──────────────────────────────────────────────────── +cache_key = make_cache_key(last_date_str, start_yr, fee_bps, int(epochs), + split_option, include_cash, lookback) +cached_data = load_cache(cache_key) +from_cache = cached_data is not None + +if from_cache: + results = cached_data["results"] + trained_info = cached_data["trained_info"] + test_dates = pd.DatetimeIndex(cached_data["test_dates"]) + test_slice = cached_data["test_slice"] + st.success("⚑ Results loaded from cache β€” no retraining needed.") +else: + # ── Build sequences ─────────────────────────────────────────────────────── + X_seq, y_seq = build_sequences(X_raw, y_raw, lookback) + y_labels = returns_to_labels(y_seq, include_cash=include_cash) + + (X_train, y_train_r, X_val, y_val_r, + X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct) + (_, y_train_l, _, y_val_l, + _, _) = train_val_test_split(X_seq, y_labels, train_pct, val_pct) + + X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test) + + train_size = len(X_train) + val_size = len(X_val) + test_start = lookback + train_size + val_size + test_dates = df.index[test_start: test_start + len(X_test)] + test_slice = slice(test_start, test_start + len(X_test)) + + results = {} + trained_info = {} + progress = st.progress(0, text="Training Approach 1...") + + # ── Approach 1 ──────────────────────────────────────────────────────────── try: - model1, hist1, _ = train_approach1( + model1, _, _ = train_approach1( X_train_s, y_train_l, X_val_s, y_val_l, n_classes=n_classes, epochs=int(epochs), ) @@ -159,17 +195,15 @@ with st.spinner("🌊 Training Approach 1 β€” Wavelet CNN-LSTM..."): target_etfs, fee_bps, tbill_rate, include_cash, ) trained_info["Approach 1"] = {"proba": proba1} - st.success("βœ… Approach 1 complete") except Exception as e: st.warning(f"⚠️ Approach 1 failed: {e}") results["Approach 1"] = None -progress.progress(33, text="Approach 1 done...") + progress.progress(33, text="Training Approach 2...") -# Approach 2 -with st.spinner("πŸ”€ Training Approach 2 β€” Regime-Conditioned CNN-LSTM..."): + # ── Approach 2 ──────────────────────────────────────────────────────────── try: - model2, hist2, hmm2, regime_cols2 = train_approach2( + model2, _, hmm2, regime_cols2 = train_approach2( X_train_s, y_train_l, X_val_s, y_val_l, X_flat_all=X_raw, feature_names=input_features, lookback=lookback, train_size=train_size, val_size=val_size, @@ -184,17 +218,15 @@ with st.spinner("πŸ”€ Training Approach 2 β€” Regime-Conditioned CNN-LSTM..."): target_etfs, fee_bps, tbill_rate, include_cash, ) trained_info["Approach 2"] = {"proba": proba2} - st.success("βœ… Approach 2 complete") except Exception as e: st.warning(f"⚠️ Approach 2 failed: {e}") results["Approach 2"] = None -progress.progress(66, text="Approach 2 done...") + progress.progress(66, text="Training Approach 3...") -# Approach 3 -with st.spinner("πŸ“‘ Training Approach 3 β€” Multi-Scale CNN-LSTM..."): + # ── Approach 3 ──────────────────────────────────────────────────────────── try: - model3, hist3 = train_approach3( + model3, _ = train_approach3( X_train_s, y_train_l, X_val_s, y_val_l, n_classes=n_classes, epochs=int(epochs), ) @@ -204,13 +236,20 @@ with st.spinner("πŸ“‘ Training Approach 3 β€” Multi-Scale CNN-LSTM..."): target_etfs, fee_bps, tbill_rate, include_cash, ) trained_info["Approach 3"] = {"proba": proba3} - st.success("βœ… Approach 3 complete") except Exception as e: st.warning(f"⚠️ Approach 3 failed: {e}") results["Approach 3"] = None -progress.progress(100, text="All approaches complete!") -progress.empty() + progress.progress(100, text="Done!") + progress.empty() + + # ── Save to cache ───────────────────────────────────────────────────────── + save_cache(cache_key, { + "results": results, + "trained_info": trained_info, + "test_dates": list(test_dates), + "test_slice": test_slice, + }) # ── Select winner ───────────────────────────────────────────────────────────── winner_name = select_winner(results) @@ -226,14 +265,14 @@ st.divider() # ── Winner signal banner ────────────────────────────────────────────────────── show_signal_banner(winner_res["next_signal"], next_date, winner_name) -# ── Conviction panel (winner only) ──────────────────────────────────────────── +# ── Conviction panel ────────────────────────────────────────────────────────── winner_proba = trained_info[winner_name]["proba"] conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash) show_conviction_panel(conviction) st.divider() -# ── All models' next day signals ────────────────────────────────────────────── +# ── All models next day signals ─────────────────────────────────────────────── all_signals = { name: { "signal": res["next_signal"], @@ -242,7 +281,7 @@ all_signals = { } for name, res in results.items() if res is not None } -show_all_signals_panel(all_signals, target_etfs, include_cash, next_date) +show_all_signals_panel(all_signals, target_etfs, include_cash, next_date, optimal_lookback) st.divider() @@ -259,13 +298,13 @@ show_comparison_table(comparison_df) st.divider() -# ── Equity curves ───────────────────────────────────────────────────────────── -st.subheader("πŸ“ˆ Out-of-Sample Equity Curves β€” All Approaches vs Benchmarks") +# ── Equity curve ────────────────────────────────────────────────────────────── +st.subheader(f"πŸ“ˆ {winner_name} vs SPY & AGG β€” Out-of-Sample") fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate) st.plotly_chart(fig, use_container_width=True) st.divider() -# ── Audit trail (winner) ────────────────────────────────────────────────────── +# ── Audit trail ─────────────────────────────────────────────────────────────── st.subheader(f"πŸ“‹ Audit Trail β€” {winner_name} (Last 20 Trading Days)") show_audit_trail(winner_res["audit_trail"]) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py index d3e04d6d6eaed853e7143f18f88a5fb069c05d86..41b113eda1b094c1a0f6b40036d922934b8f2def 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py @@ -11,7 +11,7 @@ import numpy as np from data.loader import (load_dataset, check_data_freshness, get_features_and_targets, dataset_summary) -from utils.calendar import get_est_time, is_sync_window, get_next_signal_date +from utils.calendar import get_est_time, get_next_signal_date from models.base import (build_sequences, train_val_test_split, scale_features, returns_to_labels) from models.approach1_wavelet import train_approach1, predict_approach1 @@ -22,8 +22,9 @@ from signals.conviction import compute_conviction from ui.components import ( show_freshness_status, show_signal_banner, show_conviction_panel, show_metrics_row, show_comparison_table, show_audit_trail, + show_all_signals_panel, ) -from ui.charts import equity_curve_chart, comparison_bar_chart +from ui.charts import equity_curve_chart st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="🧠", layout="wide") @@ -34,12 +35,8 @@ with st.sidebar: st.header("βš™οΈ Configuration") now_est = get_est_time() st.write(f"πŸ•’ **EST:** {now_est.strftime('%H:%M:%S')}") - if is_sync_window(): - st.success("βœ… Sync Window Active") - else: - st.info("⏸️ Sync Window Inactive") - st.divider() + start_yr = st.slider("πŸ“… Start Year", 2010, 2024, 2016) fee_bps = st.slider("πŸ’° Fee (bps)", 0, 50, 10) lookback = st.slider("πŸ“ Lookback (days)", 20, 60, 30, step=5) @@ -87,9 +84,6 @@ with st.sidebar: st.write(f"**Macro:** {', '.join(summary['macro_found'])}") st.write(f"**T-bill col:** {'βœ…' if summary['tbill_found'] else '❌'}") - with st.expander("πŸ” All columns"): - st.write(summary["all_cols"]) - if not run_button: st.info("πŸ‘ˆ Configure parameters and click **πŸš€ Run All 3 Approaches**.") st.stop() @@ -101,7 +95,7 @@ st.write(f"πŸ“… **Data:** {df.index[0].strftime('%Y-%m-%d')} β†’ {df.index[-1].s # ── Features & targets ──────────────────────────────────────────────────────── try: - input_features, target_etfs, tbill_rate, df, col_info = get_features_and_targets(df) + input_features, target_etfs, tbill_rate, df, _ = get_features_and_targets(df) except ValueError as e: st.error(str(e)) st.stop() @@ -109,18 +103,6 @@ except ValueError as e: n_etfs = len(target_etfs) n_classes = n_etfs + (1 if include_cash else 0) -# ── Show column detection diagnostics ──────────────────────────────────────── -with st.expander("πŸ”¬ Column detection diagnostics", expanded=False): - st.write("**How each ETF column was interpreted:**") - for col, info in col_info.items(): - st.write(f"- `{col}`: {info}") - st.write(f"**Input features ({len(input_features)}):** {input_features}") - st.write(f"**T-bill rate used:** {tbill_rate*100:.3f}%") - - # Show sample return values to verify correctness - st.write("**Sample target return values (last 3 rows):**") - st.dataframe(df[target_etfs].tail(3)) - st.info( f"🎯 **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} Β· " f"**Features:** {len(input_features)} signals Β· " @@ -131,19 +113,15 @@ st.info( X_raw = df[input_features].values.astype(np.float32) y_raw = df[target_etfs].values.astype(np.float32) -# Fill NaNs -col_means = np.nanmean(X_raw, axis=0) for j in range(X_raw.shape[1]): mask = np.isnan(X_raw[:, j]) if mask.any(): - X_raw[mask, j] = col_means[j] + X_raw[mask, j] = np.nanmean(X_raw[:, j]) -# Also fill NaNs in y_raw -y_means = np.nanmean(y_raw, axis=0) for j in range(y_raw.shape[1]): mask = np.isnan(y_raw[:, j]) if mask.any(): - y_raw[mask, j] = y_means[j] + y_raw[mask, j] = np.nanmean(y_raw[:, j]) X_seq, y_seq = build_sequences(X_raw, y_raw, lookback) y_labels = returns_to_labels(y_seq, include_cash=include_cash) @@ -151,7 +129,7 @@ y_labels = returns_to_labels(y_seq, include_cash=include_cash) (X_train, y_train_r, X_val, y_val_r, X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct) (_, y_train_l, _, y_val_l, - _, y_test_l) = train_val_test_split(X_seq, y_labels, train_pct, val_pct) + _, _) = train_val_test_split(X_seq, y_labels, train_pct, val_pct) X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test) @@ -163,14 +141,6 @@ test_slice = slice(test_start, test_start + len(X_test)) st.success(f"βœ… Sequences β€” Train: {train_size:,} Β· Val: {val_size:,} Β· Test: {len(X_test):,}") -# Show class distribution to check for degenerate labels -with st.expander("πŸ”¬ Label distribution (train set)", expanded=False): - unique, counts = np.unique(y_train_l, return_counts=True) - label_names = [target_etfs[i].replace("_Ret","") if i < n_etfs else "CASH" for i in unique] - dist_df = pd.DataFrame({"Class": label_names, "Count": counts, - "Pct": (counts / counts.sum() * 100).round(1)}) - st.dataframe(dist_df) - # ── Train all three approaches ──────────────────────────────────────────────── results = {} trained_info = {} @@ -253,27 +223,49 @@ if winner_res is None: next_date = get_next_signal_date() st.divider() +# ── Winner signal banner ────────────────────────────────────────────────────── show_signal_banner(winner_res["next_signal"], next_date, winner_name) +# ── Conviction panel (winner only) ──────────────────────────────────────────── winner_proba = trained_info[winner_name]["proba"] conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash) show_conviction_panel(conviction) st.divider() + +# ── All models' next day signals ────────────────────────────────────────────── +all_signals = { + name: { + "signal": res["next_signal"], + "proba": trained_info[name]["proba"][-1], + "is_winner": name == winner_name, + } + for name, res in results.items() if res is not None +} +show_all_signals_panel(all_signals, target_etfs, include_cash, next_date) + +st.divider() + +# ── Winner performance metrics ──────────────────────────────────────────────── st.subheader(f"πŸ“Š {winner_name} β€” Performance Metrics") show_metrics_row(winner_res, tbill_rate) st.divider() + +# ── Comparison table ────────────────────────────────────────────────────────── st.subheader("πŸ† Approach Comparison (Winner = Highest Raw Annualised Return)") comparison_df = build_comparison_table(results, winner_name) show_comparison_table(comparison_df) -st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True) st.divider() + +# ── Equity curves ───────────────────────────────────────────────────────────── st.subheader("πŸ“ˆ Out-of-Sample Equity Curves β€” All Approaches vs Benchmarks") fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate) st.plotly_chart(fig, use_container_width=True) st.divider() + +# ── Audit trail (winner) ────────────────────────────────────────────────────── st.subheader(f"πŸ“‹ Audit Trail β€” {winner_name} (Last 20 Trading Days)") show_audit_trail(winner_res["audit_trail"]) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py index f7484b3e44c030816ad8f2d236eeedf500b4b341..0f9fef47d7abc5d77c115965ba7fccaa081841a8 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py @@ -1,12 +1,8 @@ """ data/loader.py Loads master_data.parquet from HF Dataset. -Validates freshness against the last NYSE trading day. -No external pings β€” all data comes from HF Dataset only. - -Actual dataset columns (confirmed from parquet inspection): - ETFs : AGG, GLD, SLV, SPY, TBT, TLT, VNQ - Macro : VIX, DXY, T10Y2Y, TBILL_3M, IG_SPREAD, HY_SPREAD +Engineers rich feature set from raw price/macro columns. +No external pings β€” all data from HF Dataset only. """ import pandas as pd @@ -22,9 +18,8 @@ try: except ImportError: NYSE_CAL_AVAILABLE = False -DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data" -PARQUET_FILE = "master_data.parquet" - +DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data" +PARQUET_FILE = "master_data.parquet" TARGET_ETF_COLS = ["TLT", "TBT", "VNQ", "SLV", "GLD"] BENCHMARK_COLS = ["SPY", "AGG"] TBILL_COL = "TBILL_3M" @@ -64,16 +59,13 @@ def load_dataset(hf_token: str) -> pd.DataFrame: token=hf_token, ) df = pd.read_parquet(path) - if not isinstance(df.index, pd.DatetimeIndex): for col in ["Date", "date", "DATE"]: if col in df.columns: df = df.set_index(col) break df.index = pd.to_datetime(df.index) - return df.sort_index() - except Exception as e: st.error(f"❌ Failed to load dataset: {e}") return pd.DataFrame() @@ -84,11 +76,9 @@ def load_dataset(hf_token: str) -> pd.DataFrame: def check_data_freshness(df: pd.DataFrame) -> dict: if df.empty: return {"fresh": False, "message": "Dataset is empty."} - last = df.index[-1].date() expect = get_last_nyse_trading_day() fresh = last >= expect - msg = ( f"βœ… Dataset up to date through **{last}**." if fresh else f"⚠️ **{expect}** data not yet updated. Latest: **{last}**. " @@ -98,106 +88,139 @@ def check_data_freshness(df: pd.DataFrame) -> dict: "expected_date": expect, "message": msg} -# ── Detect whether a column holds prices or returns ─────────────────────────── +# ── Price β†’ returns ─────────────────────────────────────────────────────────── -def _is_price_series(series: pd.Series) -> bool: - """ - Heuristic: a price series has abs(median) > 2 and std/mean < 0.5. - A return series has abs(median) < 0.1 and many values near zero. - """ +def _to_returns(series: pd.Series) -> pd.Series: + """Convert price series to daily pct returns. If already returns, pass through.""" clean = series.dropna() if len(clean) == 0: - return False - med = abs(clean.median()) - # Strong price signal: median > 2 (e.g. TLT ~ 90, TBT ~ 20) - if med > 2: - return True - # Strong return signal: most values between -0.2 and 0.2 - if (clean.abs() < 0.2).mean() > 0.9: - return False - return med > 0.5 + return series + if abs(clean.median()) > 2: # price series + return series.pct_change() + return series # already returns + + +# ── Feature engineering ─────────────────────────────────────────────────────── + +def _engineer_features(df: pd.DataFrame, ret_cols: list) -> pd.DataFrame: + """ + Build a rich feature set from raw macro + ETF return columns. + + Features added per ETF return: + - 1d, 5d, 21d lagged returns + - 5d, 21d rolling volatility + - 5d, 21d momentum (cumulative return) + + Features added per macro column: + - raw value (z-scored over rolling 252d window) + - 5d change + - 1d lag + + Also adds: + - TBILL_3M as a feature (rate level) + - VIX regime flag (VIX > 25) + - Yield curve slope (already T10Y2Y) + - Cross-asset momentum: spread between TLT_ret and TBT_ret + """ + feat = pd.DataFrame(index=df.index) + + # ── ETF return features ─────────────────────────────────────────────────── + for col in ret_cols: + r = df[col] + feat[f"{col}_lag1"] = r.shift(1) + feat[f"{col}_lag5"] = r.shift(5) + feat[f"{col}_lag21"] = r.shift(21) + feat[f"{col}_vol5"] = r.rolling(5).std() + feat[f"{col}_vol21"] = r.rolling(21).std() + feat[f"{col}_mom5"] = r.rolling(5).sum() + feat[f"{col}_mom21"] = r.rolling(21).sum() + + # ── Macro features ──────────────────────────────────────────────────────── + for col in MACRO_COLS: + if col not in df.columns: + continue + s = df[col] + # Z-score over rolling 252-day window + roll_mean = s.rolling(252, min_periods=63).mean() + roll_std = s.rolling(252, min_periods=63).std() + feat[f"{col}_z"] = (s - roll_mean) / (roll_std + 1e-9) + feat[f"{col}_chg5"] = s.diff(5) + feat[f"{col}_lag1"] = s.shift(1) + + # ── TBILL level ─────────────────────────────────────────────────────────── + if TBILL_COL in df.columns: + tbill = df[TBILL_COL] + feat["TBILL_level"] = tbill + feat["TBILL_chg5"] = tbill.diff(5) + + # ── Derived cross-asset signals ─────────────────────────────────────────── + if "TLT_Ret" in df.columns and "TBT_Ret" in df.columns: + feat["TLT_TBT_spread_mom5"] = ( + df["TLT_Ret"].rolling(5).sum() - df["TBT_Ret"].rolling(5).sum() + ) + + if "VIX" in df.columns: + feat["VIX_regime"] = (df["VIX"] > 25).astype(float) + feat["VIX_mom5"] = df["VIX"].diff(5) + + if "T10Y2Y" in df.columns: + feat["YC_inverted"] = (df["T10Y2Y"] < 0).astype(float) + if "IG_SPREAD" in df.columns and "HY_SPREAD" in df.columns: + feat["credit_ratio"] = df["HY_SPREAD"] / (df["IG_SPREAD"] + 1e-9) -# ── Feature / target extraction ─────────────────────────────────────────────── + return feat + + +# ── Main extraction function ────────────────────────────────────────────────── def get_features_and_targets(df: pd.DataFrame): """ - Build return columns for target ETFs and benchmarks. - Auto-detects whether source columns are prices or already returns. + Build return columns for target ETFs and engineer a rich feature set. Returns: input_features : list[str] target_etfs : list[str] e.g. ["TLT_Ret", ...] tbill_rate : float - df : DataFrame with _Ret columns added - col_info : dict of diagnostics for sidebar display + df_out : DataFrame with all columns + col_info : dict of diagnostics """ missing = [c for c in TARGET_ETF_COLS if c not in df.columns] if missing: raise ValueError( f"Missing ETF columns: {missing}. " - f"Found in dataset: {list(df.columns)}" + f"Found: {list(df.columns)}" ) col_info = {} - # ── Build _Ret columns ──────────────────────────────────────────────────── - def make_ret(col): + # ── Build ETF return columns ────────────────────────────────────────────── + target_etfs = [] + for col in TARGET_ETF_COLS: ret_col = f"{col}_Ret" - if ret_col in df.columns: - col_info[col] = "pre-computed _Ret" - return ret_col - if _is_price_series(df[col]): - df[ret_col] = df[col].pct_change() - col_info[col] = f"priceβ†’pct_change (median={df[col].median():.2f})" - else: - df[ret_col] = df[col] - col_info[col] = f"used as-is (median={df[col].median():.4f})" - return ret_col - - target_etfs = [make_ret(c) for c in TARGET_ETF_COLS] - benchmark_rets = [make_ret(c) for c in BENCHMARK_COLS if c in df.columns] - - # Drop NaN rows (first row from pct_change) + df[ret_col] = _to_returns(df[col]) + med = abs(df[col].dropna().median()) + col_info[col] = f"priceβ†’pct_change (median={med:.2f})" if med > 2 else f"used as-is (median={med:.4f})" + target_etfs.append(ret_col) + + # ── Build benchmark return columns ──────────────────────────────────────── + for col in BENCHMARK_COLS: + if col in df.columns: + df[f"{col}_Ret"] = _to_returns(df[col]) + + # ── Drop NaN from first pct_change row ──────────────────────────────────── df = df.dropna(subset=target_etfs).copy() - # Sanity check: target returns should be small daily values - for ret_col in target_etfs: - med = df[ret_col].abs().median() - if med > 0.1: - st.warning( - f"⚠️ {ret_col} has median absolute value {med:.4f} β€” " - f"these may not be daily returns. Check dataset column '{ret_col.replace('_Ret','')}'. " - f"Sample values: {df[ret_col].tail(3).values}" - ) - - # ── Input features ──────────────────────────────────────────────────────── - exclude = set( - TARGET_ETF_COLS + BENCHMARK_COLS + target_etfs + benchmark_rets + - [f"{c}_Ret" for c in BENCHMARK_COLS] + [TBILL_COL] - ) + # ── Engineer features ───────────────────────────────────────────────────── + feat_df = _engineer_features(df, target_etfs) + + # Merge features into df + for col in feat_df.columns: + df[col] = feat_df[col].values - # First try known macro columns - input_features = [c for c in MACRO_COLS if c in df.columns and c not in exclude] - - # Then add any engineered signal columns - extra = [ - c for c in df.columns - if c not in exclude - and c not in input_features - and any(k in c for k in ["_Z", "_Vol", "Regime", "YC_", "Credit_", - "Rates_", "VIX_", "Spread", "DXY", "T10Y", - "TBILL", "SOFR", "MOVE"]) - and pd.api.types.is_numeric_dtype(df[c]) - ] - input_features += extra - - # Fallback: all numeric non-excluded columns - if not input_features: - input_features = [ - c for c in df.columns - if c not in exclude and pd.api.types.is_numeric_dtype(df[c]) - ] + # Drop rows with NaN in features (from lags/rolling) + feat_cols = list(feat_df.columns) + df = df.dropna(subset=feat_cols).copy() # ── T-bill rate ─────────────────────────────────────────────────────────── tbill_rate = 0.045 @@ -207,6 +230,14 @@ def get_features_and_targets(df: pd.DataFrame): v = float(raw.iloc[-1]) tbill_rate = v / 100 if v > 1 else v + # Input features = all engineered feature columns + exclude = set( + TARGET_ETF_COLS + BENCHMARK_COLS + target_etfs + + [f"{c}_Ret" for c in BENCHMARK_COLS] + [TBILL_COL] + + list(MACRO_COLS) + ) + input_features = [c for c in feat_cols if c not in exclude] + return input_features, target_etfs, tbill_rate, df, col_info diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py index 97ceca9077e0805a4127a483233c87465d782c6e..d3e04d6d6eaed853e7143f18f88a5fb069c05d86 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py @@ -9,7 +9,6 @@ import streamlit as st import pandas as pd import numpy as np -# ── Module imports ──────────────────────────────────────────────────────────── from data.loader import (load_dataset, check_data_freshness, get_features_and_targets, dataset_summary) from utils.calendar import get_est_time, is_sync_window, get_next_signal_date @@ -26,20 +25,13 @@ from ui.components import ( ) from ui.charts import equity_curve_chart, comparison_bar_chart -# ── Page config ─────────────────────────────────────────────────────────────── -st.set_page_config( - page_title="P2-ETF-CNN-LSTM", - page_icon="🧠", - layout="wide", -) +st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="🧠", layout="wide") -# ── Secrets ─────────────────────────────────────────────────────────────────── HF_TOKEN = os.getenv("HF_TOKEN", "") # ── Sidebar ─────────────────────────────────────────────────────────────────── with st.sidebar: st.header("βš™οΈ Configuration") - now_est = get_est_time() st.write(f"πŸ•’ **EST:** {now_est.strftime('%H:%M:%S')}") if is_sync_window(): @@ -48,25 +40,19 @@ with st.sidebar: st.info("⏸️ Sync Window Inactive") st.divider() - start_yr = st.slider("πŸ“… Start Year", 2010, 2024, 2016) fee_bps = st.slider("πŸ’° Fee (bps)", 0, 50, 10) lookback = st.slider("πŸ“ Lookback (days)", 20, 60, 30, step=5) epochs = st.number_input("πŸ” Max Epochs", 20, 300, 100, step=10) st.divider() - split_option = st.selectbox("πŸ“Š Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0) - split_map = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)} - train_pct, val_pct = split_map[split_option] + train_pct, val_pct = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}[split_option] - include_cash = st.checkbox( - "πŸ’΅ Include CASH class", value=True, - help="Model can select CASH (earns T-bill rate) instead of any ETF", - ) + include_cash = st.checkbox("πŸ’΅ Include CASH class", value=True, + help="Model can select CASH (earns T-bill rate) instead of any ETF") st.divider() - run_button = st.button("πŸš€ Run All 3 Approaches", type="primary", use_container_width=True) # ── Title ───────────────────────────────────────────────────────────────────── @@ -74,9 +60,8 @@ st.title("🧠 P2-ETF-CNN-LSTM") st.caption("Approach 1: Wavelet Β· Approach 2: Regime-Conditioned Β· Approach 3: Multi-Scale Parallel") st.caption("Winner selected by highest raw annualised return on out-of-sample test set.") -# ── Token check ─────────────────────────────────────────────────────────────── if not HF_TOKEN: - st.error("❌ HF_TOKEN secret not found. Add it to HF Space / GitHub secrets.") + st.error("❌ HF_TOKEN secret not found.") st.stop() # ── Load dataset ────────────────────────────────────────────────────────────── @@ -86,11 +71,10 @@ with st.spinner("πŸ“‘ Loading dataset from HuggingFace..."): if df_raw.empty: st.stop() -# ── Freshness check ─────────────────────────────────────────────────────────── freshness = check_data_freshness(df_raw) show_freshness_status(freshness) -# ── Dataset summary in sidebar ──────────────────────────────────────────────── +# ── Dataset info sidebar ────────────────────────────────────────────────────── with st.sidebar: st.divider() st.subheader("πŸ“¦ Dataset Info") @@ -103,21 +87,21 @@ with st.sidebar: st.write(f"**Macro:** {', '.join(summary['macro_found'])}") st.write(f"**T-bill col:** {'βœ…' if summary['tbill_found'] else '❌'}") -# ── Wait for run button ─────────────────────────────────────────────────────── + with st.expander("πŸ” All columns"): + st.write(summary["all_cols"]) + if not run_button: - st.info("πŸ‘ˆ Configure parameters in the sidebar and click **πŸš€ Run All 3 Approaches**.") + st.info("πŸ‘ˆ Configure parameters and click **πŸš€ Run All 3 Approaches**.") st.stop() # ── Filter by start year ────────────────────────────────────────────────────── df = df_raw[df_raw.index.year >= start_yr].copy() -st.write( - f"πŸ“… **Data:** {df.index[0].strftime('%Y-%m-%d')} β†’ {df.index[-1].strftime('%Y-%m-%d')} " - f"({df.index[-1].year - df.index[0].year + 1} years)" -) +st.write(f"πŸ“… **Data:** {df.index[0].strftime('%Y-%m-%d')} β†’ {df.index[-1].strftime('%Y-%m-%d')} " + f"({df.index[-1].year - df.index[0].year + 1} years)") # ── Features & targets ──────────────────────────────────────────────────────── try: - input_features, target_etfs, tbill_rate, df = get_features_and_targets(df) + input_features, target_etfs, tbill_rate, df, col_info = get_features_and_targets(df) except ValueError as e: st.error(str(e)) st.stop() @@ -125,6 +109,18 @@ except ValueError as e: n_etfs = len(target_etfs) n_classes = n_etfs + (1 if include_cash else 0) +# ── Show column detection diagnostics ──────────────────────────────────────── +with st.expander("πŸ”¬ Column detection diagnostics", expanded=False): + st.write("**How each ETF column was interpreted:**") + for col, info in col_info.items(): + st.write(f"- `{col}`: {info}") + st.write(f"**Input features ({len(input_features)}):** {input_features}") + st.write(f"**T-bill rate used:** {tbill_rate*100:.3f}%") + + # Show sample return values to verify correctness + st.write("**Sample target return values (last 3 rows):**") + st.dataframe(df[target_etfs].tail(3)) + st.info( f"🎯 **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} Β· " f"**Features:** {len(input_features)} signals Β· " @@ -135,13 +131,20 @@ st.info( X_raw = df[input_features].values.astype(np.float32) y_raw = df[target_etfs].values.astype(np.float32) -# Fill any remaining NaNs with column means +# Fill NaNs col_means = np.nanmean(X_raw, axis=0) for j in range(X_raw.shape[1]): mask = np.isnan(X_raw[:, j]) if mask.any(): X_raw[mask, j] = col_means[j] +# Also fill NaNs in y_raw +y_means = np.nanmean(y_raw, axis=0) +for j in range(y_raw.shape[1]): + mask = np.isnan(y_raw[:, j]) + if mask.any(): + y_raw[mask, j] = y_means[j] + X_seq, y_seq = build_sequences(X_raw, y_raw, lookback) y_labels = returns_to_labels(y_seq, include_cash=include_cash) @@ -154,27 +157,30 @@ X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test) train_size = len(X_train) val_size = len(X_val) - test_start = lookback + train_size + val_size test_dates = df.index[test_start: test_start + len(X_test)] test_slice = slice(test_start, test_start + len(X_test)) -st.success( - f"βœ… Sequences β€” Train: {train_size:,} Β· Val: {val_size:,} Β· Test: {len(X_test):,}" -) +st.success(f"βœ… Sequences β€” Train: {train_size:,} Β· Val: {val_size:,} Β· Test: {len(X_test):,}") + +# Show class distribution to check for degenerate labels +with st.expander("πŸ”¬ Label distribution (train set)", expanded=False): + unique, counts = np.unique(y_train_l, return_counts=True) + label_names = [target_etfs[i].replace("_Ret","") if i < n_etfs else "CASH" for i in unique] + dist_df = pd.DataFrame({"Class": label_names, "Count": counts, + "Pct": (counts / counts.sum() * 100).round(1)}) + st.dataframe(dist_df) # ── Train all three approaches ──────────────────────────────────────────────── results = {} trained_info = {} +progress = st.progress(0, text="Starting training...") -progress = st.progress(0, text="Starting training...") - -# ── Approach 1 ──────────────────────────────────────────────────────────────── +# Approach 1 with st.spinner("🌊 Training Approach 1 β€” Wavelet CNN-LSTM..."): try: model1, hist1, _ = train_approach1( - X_train_s, y_train_l, - X_val_s, y_val_l, + X_train_s, y_train_l, X_val_s, y_val_l, n_classes=n_classes, epochs=int(epochs), ) preds1, proba1 = predict_approach1(model1, X_test_s) @@ -190,17 +196,13 @@ with st.spinner("🌊 Training Approach 1 β€” Wavelet CNN-LSTM..."): progress.progress(33, text="Approach 1 done...") -# ── Approach 2 ──────────────────────────────────────────────────────────────── +# Approach 2 with st.spinner("πŸ”€ Training Approach 2 β€” Regime-Conditioned CNN-LSTM..."): try: model2, hist2, hmm2, regime_cols2 = train_approach2( - X_train_s, y_train_l, - X_val_s, y_val_l, - X_flat_all=X_raw, - feature_names=input_features, - lookback=lookback, - train_size=train_size, - val_size=val_size, + X_train_s, y_train_l, X_val_s, y_val_l, + X_flat_all=X_raw, feature_names=input_features, + lookback=lookback, train_size=train_size, val_size=val_size, n_classes=n_classes, epochs=int(epochs), ) preds2, proba2 = predict_approach2( @@ -219,12 +221,11 @@ with st.spinner("πŸ”€ Training Approach 2 β€” Regime-Conditioned CNN-LSTM..."): progress.progress(66, text="Approach 2 done...") -# ── Approach 3 ──────────────────────────────────────────────────────────────── +# Approach 3 with st.spinner("πŸ“‘ Training Approach 3 β€” Multi-Scale CNN-LSTM..."): try: model3, hist3 = train_approach3( - X_train_s, y_train_l, - X_val_s, y_val_l, + X_train_s, y_train_l, X_val_s, y_val_l, n_classes=n_classes, epochs=int(epochs), ) preds3, proba3 = predict_approach3(model3, X_test_s) @@ -250,41 +251,29 @@ if winner_res is None: st.stop() next_date = get_next_signal_date() - st.divider() -# ── Signal banner ───────────────────────────────────────────────────────────── show_signal_banner(winner_res["next_signal"], next_date, winner_name) -# ── Conviction panel ────────────────────────────────────────────────────────── winner_proba = trained_info[winner_name]["proba"] conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash) show_conviction_panel(conviction) st.divider() - -# ── Winner metrics ──────────────────────────────────────────────────────────── st.subheader(f"πŸ“Š {winner_name} β€” Performance Metrics") show_metrics_row(winner_res, tbill_rate) st.divider() - -# ── Comparison table ────────────────────────────────────────────────────────── st.subheader("πŸ† Approach Comparison (Winner = Highest Raw Annualised Return)") comparison_df = build_comparison_table(results, winner_name) show_comparison_table(comparison_df) - st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True) st.divider() - -# ── Equity curves ───────────────────────────────────────────────────────────── st.subheader("πŸ“ˆ Out-of-Sample Equity Curves β€” All Approaches vs Benchmarks") fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate) st.plotly_chart(fig, use_container_width=True) st.divider() - -# ── Audit trail ─────────────────────────────────────────────────────────────── st.subheader(f"πŸ“‹ Audit Trail β€” {winner_name} (Last 20 Trading Days)") show_audit_trail(winner_res["audit_trail"]) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py index ae2533008e483f244c7dd3a1e189bb64e4488a4f..f7484b3e44c030816ad8f2d236eeedf500b4b341 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py @@ -4,7 +4,7 @@ Loads master_data.parquet from HF Dataset. Validates freshness against the last NYSE trading day. No external pings β€” all data comes from HF Dataset only. -Actual dataset columns (from parquet inspection): +Actual dataset columns (confirmed from parquet inspection): ETFs : AGG, GLD, SLV, SPY, TBT, TLT, VNQ Macro : VIX, DXY, T10Y2Y, TBILL_3M, IG_SPREAD, HY_SPREAD """ @@ -15,7 +15,6 @@ import streamlit as st from huggingface_hub import hf_hub_download from datetime import datetime, timedelta import pytz -import os try: import pandas_market_calendars as mcal @@ -26,33 +25,27 @@ except ImportError: DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data" PARQUET_FILE = "master_data.parquet" -# ── Actual column names in the dataset ─────────────────────────────────────── -TARGET_ETF_COLS = ["TLT", "TBT", "VNQ", "SLV", "GLD"] # traded ETFs -BENCHMARK_COLS = ["SPY", "AGG"] # chart only -TBILL_COL = "TBILL_3M" # 3m T-bill rate -MACRO_COLS = ["VIX", "DXY", "T10Y2Y", "IG_SPREAD", "HY_SPREAD"] +TARGET_ETF_COLS = ["TLT", "TBT", "VNQ", "SLV", "GLD"] +BENCHMARK_COLS = ["SPY", "AGG"] +TBILL_COL = "TBILL_3M" +MACRO_COLS = ["VIX", "DXY", "T10Y2Y", "IG_SPREAD", "HY_SPREAD"] -# ── NYSE calendar helpers ───────────────────────────────────────────────────── +# ── NYSE calendar ───────────────────────────────────────────────────────────── def get_last_nyse_trading_day(as_of=None): - """Return the most recent NYSE trading day on or before as_of (default: today EST).""" est = pytz.timezone("US/Eastern") if as_of is None: as_of = datetime.now(est) today = as_of.date() - if NYSE_CAL_AVAILABLE: try: nyse = mcal.get_calendar("NYSE") - start = today - timedelta(days=10) - sched = nyse.schedule(start_date=start, end_date=today) + sched = nyse.schedule(start_date=today - timedelta(days=10), end_date=today) if len(sched) > 0: return sched.index[-1].date() except Exception: pass - - # Fallback: skip weekends candidate = today while candidate.weekday() >= 5: candidate -= timedelta(days=1) @@ -63,10 +56,6 @@ def get_last_nyse_trading_day(as_of=None): @st.cache_data(ttl=3600, show_spinner=False) def load_dataset(hf_token: str) -> pd.DataFrame: - """ - Download master_data.parquet from HF Dataset and return as DataFrame. - Cached for 1 hour. Index is parsed as DatetimeIndex. - """ try: path = hf_hub_download( repo_id=DATASET_REPO, @@ -76,7 +65,6 @@ def load_dataset(hf_token: str) -> pd.DataFrame: ) df = pd.read_parquet(path) - # Ensure DatetimeIndex if not isinstance(df.index, pd.DatetimeIndex): for col in ["Date", "date", "DATE"]: if col in df.columns: @@ -84,66 +72,66 @@ def load_dataset(hf_token: str) -> pd.DataFrame: break df.index = pd.to_datetime(df.index) - df = df.sort_index() - return df + return df.sort_index() except Exception as e: - st.error(f"❌ Failed to load dataset from HuggingFace: {e}") + st.error(f"❌ Failed to load dataset: {e}") return pd.DataFrame() # ── Freshness check ─────────────────────────────────────────────────────────── def check_data_freshness(df: pd.DataFrame) -> dict: - """ - Check whether the dataset contains data for the last NYSE trading day. - """ if df.empty: - return { - "fresh": False, - "last_date_in_data": None, - "expected_date": None, - "message": "Dataset is empty.", - } - - last_date_in_data = df.index[-1].date() - expected_date = get_last_nyse_trading_day() - fresh = last_date_in_data >= expected_date - - if fresh: - message = f"βœ… Dataset is up to date through **{last_date_in_data}**." - else: - message = ( - f"⚠️ **{expected_date}** data not yet updated in dataset. " - f"Latest available: **{last_date_in_data}**. " - f"Please check back later β€” the dataset updates daily after market close." - ) + return {"fresh": False, "message": "Dataset is empty."} - return { - "fresh": fresh, - "last_date_in_data": last_date_in_data, - "expected_date": expected_date, - "message": message, - } + last = df.index[-1].date() + expect = get_last_nyse_trading_day() + fresh = last >= expect + + msg = ( + f"βœ… Dataset up to date through **{last}**." if fresh else + f"⚠️ **{expect}** data not yet updated. Latest: **{last}**. " + f"Dataset updates daily after market close." + ) + return {"fresh": fresh, "last_date_in_data": last, + "expected_date": expect, "message": msg} + + +# ── Detect whether a column holds prices or returns ─────────────────────────── + +def _is_price_series(series: pd.Series) -> bool: + """ + Heuristic: a price series has abs(median) > 2 and std/mean < 0.5. + A return series has abs(median) < 0.1 and many values near zero. + """ + clean = series.dropna() + if len(clean) == 0: + return False + med = abs(clean.median()) + # Strong price signal: median > 2 (e.g. TLT ~ 90, TBT ~ 20) + if med > 2: + return True + # Strong return signal: most values between -0.2 and 0.2 + if (clean.abs() < 0.2).mean() > 0.9: + return False + return med > 0.5 # ── Feature / target extraction ─────────────────────────────────────────────── def get_features_and_targets(df: pd.DataFrame): """ - Extract input feature columns and target ETF return columns. - - The dataset stores raw price or return values directly under ticker names. - We compute daily log returns for target ETFs if they are not already returns. + Build return columns for target ETFs and benchmarks. + Auto-detects whether source columns are prices or already returns. Returns: - input_features : list of column names to use as model inputs - target_etfs : list of ETF column names (after return computation) - tbill_rate : latest 3m T-bill rate as float (annualised, e.g. 0.045) - df : DataFrame (possibly with new _Ret columns added) + input_features : list[str] + target_etfs : list[str] e.g. ["TLT_Ret", ...] + tbill_rate : float + df : DataFrame with _Ret columns added + col_info : dict of diagnostics for sidebar display """ - - # ── Confirm target ETFs exist ───────────────────────────────────────────── missing = [c for c in TARGET_ETF_COLS if c not in df.columns] if missing: raise ValueError( @@ -151,71 +139,75 @@ def get_features_and_targets(df: pd.DataFrame): f"Found in dataset: {list(df.columns)}" ) - # ── Build return columns ────────────────────────────────────────────────── - # If values look like prices (>5), compute pct returns. - # If they already look like small returns (<1 in abs), use as-is. - target_etfs = [] - for col in TARGET_ETF_COLS: - ret_col = f"{col}_Ret" - if ret_col not in df.columns: - sample = df[col].dropna() - if len(sample) > 0 and abs(sample.median()) > 1: - # Looks like price β€” compute pct change - df[ret_col] = df[col].pct_change() - else: - # Already returns - df[ret_col] = df[col] - target_etfs.append(ret_col) - - # Same for benchmarks - for col in BENCHMARK_COLS: - ret_col = f"{col}_Ret" - if ret_col not in df.columns and col in df.columns: - sample = df[col].dropna() - if len(sample) > 0 and abs(sample.median()) > 1: - df[ret_col] = df[col].pct_change() - else: - df[ret_col] = df[col] + col_info = {} - # Drop rows with NaN in target columns (first row after pct_change) - df = df.dropna(subset=target_etfs) + # ── Build _Ret columns ──────────────────────────────────────────────────── + def make_ret(col): + ret_col = f"{col}_Ret" + if ret_col in df.columns: + col_info[col] = "pre-computed _Ret" + return ret_col + if _is_price_series(df[col]): + df[ret_col] = df[col].pct_change() + col_info[col] = f"priceβ†’pct_change (median={df[col].median():.2f})" + else: + df[ret_col] = df[col] + col_info[col] = f"used as-is (median={df[col].median():.4f})" + return ret_col + + target_etfs = [make_ret(c) for c in TARGET_ETF_COLS] + benchmark_rets = [make_ret(c) for c in BENCHMARK_COLS if c in df.columns] + + # Drop NaN rows (first row from pct_change) + df = df.dropna(subset=target_etfs).copy() + + # Sanity check: target returns should be small daily values + for ret_col in target_etfs: + med = df[ret_col].abs().median() + if med > 0.1: + st.warning( + f"⚠️ {ret_col} has median absolute value {med:.4f} β€” " + f"these may not be daily returns. Check dataset column '{ret_col.replace('_Ret','')}'. " + f"Sample values: {df[ret_col].tail(3).values}" + ) # ── Input features ──────────────────────────────────────────────────────── - # Use macro columns directly; exclude ETF price/return cols and benchmarks exclude = set( - TARGET_ETF_COLS + BENCHMARK_COLS + - target_etfs + - [f"{c}_Ret" for c in BENCHMARK_COLS] + - [TBILL_COL] + TARGET_ETF_COLS + BENCHMARK_COLS + target_etfs + benchmark_rets + + [f"{c}_Ret" for c in BENCHMARK_COLS] + [TBILL_COL] ) - input_features = [ + # First try known macro columns + input_features = [c for c in MACRO_COLS if c in df.columns and c not in exclude] + + # Then add any engineered signal columns + extra = [ c for c in df.columns if c not in exclude - and c in (MACRO_COLS + [ - col for col in df.columns - if any(k in col for k in ["_Z", "_Vol", "Regime", "YC_", "Credit_", - "Rates_", "VIX_", "Spread", "DXY", "T10Y"]) - ]) + and c not in input_features + and any(k in c for k in ["_Z", "_Vol", "Regime", "YC_", "Credit_", + "Rates_", "VIX_", "Spread", "DXY", "T10Y", + "TBILL", "SOFR", "MOVE"]) + and pd.api.types.is_numeric_dtype(df[c]) ] + input_features += extra - # Fallback: if none matched, use all non-excluded numeric columns + # Fallback: all numeric non-excluded columns if not input_features: input_features = [ c for c in df.columns - if c not in exclude - and pd.api.types.is_numeric_dtype(df[c]) + if c not in exclude and pd.api.types.is_numeric_dtype(df[c]) ] # ── T-bill rate ─────────────────────────────────────────────────────────── - tbill_rate = 0.045 # default + tbill_rate = 0.045 if TBILL_COL in df.columns: raw = df[TBILL_COL].dropna() if len(raw) > 0: - last_val = float(raw.iloc[-1]) - tbill_rate = last_val / 100 if last_val > 1 else last_val + v = float(raw.iloc[-1]) + tbill_rate = v / 100 if v > 1 else v - return input_features, target_etfs, tbill_rate, df + return input_features, target_etfs, tbill_rate, df, col_info # ── Dataset summary ─────────────────────────────────────────────────────────── @@ -228,8 +220,9 @@ def dataset_summary(df: pd.DataFrame) -> dict: "columns": len(df.columns), "start_date": df.index[0].strftime("%Y-%m-%d"), "end_date": df.index[-1].strftime("%Y-%m-%d"), - "etfs_found": [c for c in TARGET_ETF_COLS if c in df.columns], - "benchmarks": [c for c in BENCHMARK_COLS if c in df.columns], - "macro_found": [c for c in MACRO_COLS if c in df.columns], + "etfs_found": [c for c in TARGET_ETF_COLS if c in df.columns], + "benchmarks": [c for c in BENCHMARK_COLS if c in df.columns], + "macro_found": [c for c in MACRO_COLS if c in df.columns], "tbill_found": TBILL_COL in df.columns, + "all_cols": list(df.columns), } diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py index eded65d240364585407db8613bb5c2acdbe1d7d9..97ceca9077e0805a4127a483233c87465d782c6e 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py @@ -10,9 +10,11 @@ import pandas as pd import numpy as np # ── Module imports ──────────────────────────────────────────────────────────── -from data.loader import load_dataset, check_data_freshness, get_features_and_targets, dataset_summary +from data.loader import (load_dataset, check_data_freshness, + get_features_and_targets, dataset_summary) from utils.calendar import get_est_time, is_sync_window, get_next_signal_date -from models.base import build_sequences, train_val_test_split, scale_features, returns_to_labels +from models.base import (build_sequences, train_val_test_split, + scale_features, returns_to_labels) from models.approach1_wavelet import train_approach1, predict_approach1 from models.approach2_regime import train_approach2, predict_approach2 from models.approach3_multiscale import train_approach3, predict_approach3 @@ -47,10 +49,10 @@ with st.sidebar: st.divider() - start_yr = st.slider("πŸ“… Start Year", 2010, 2024, 2016) - fee_bps = st.slider("πŸ’° Fee (bps)", 0, 50, 10) - lookback = st.slider("πŸ“ Lookback (days)", 20, 60, 30, step=5) - epochs = st.number_input("πŸ” Max Epochs", 20, 300, 100, step=10) + start_yr = st.slider("πŸ“… Start Year", 2010, 2024, 2016) + fee_bps = st.slider("πŸ’° Fee (bps)", 0, 50, 10) + lookback = st.slider("πŸ“ Lookback (days)", 20, 60, 30, step=5) + epochs = st.number_input("πŸ” Max Epochs", 20, 300, 100, step=10) st.divider() @@ -58,8 +60,10 @@ with st.sidebar: split_map = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)} train_pct, val_pct = split_map[split_option] - include_cash = st.checkbox("πŸ’΅ Include CASH class", value=True, - help="Model can select CASH (earns T-bill rate) as an alternative to any ETF") + include_cash = st.checkbox( + "πŸ’΅ Include CASH class", value=True, + help="Model can select CASH (earns T-bill rate) instead of any ETF", + ) st.divider() @@ -70,90 +74,102 @@ st.title("🧠 P2-ETF-CNN-LSTM") st.caption("Approach 1: Wavelet Β· Approach 2: Regime-Conditioned Β· Approach 3: Multi-Scale Parallel") st.caption("Winner selected by highest raw annualised return on out-of-sample test set.") -# ── Load data (always, to check freshness) ──────────────────────────────────── +# ── Token check ─────────────────────────────────────────────────────────────── if not HF_TOKEN: - st.error("❌ HF_TOKEN secret not found. Please add it to your HF Space / GitHub secrets.") + st.error("❌ HF_TOKEN secret not found. Add it to HF Space / GitHub secrets.") st.stop() +# ── Load dataset ────────────────────────────────────────────────────────────── with st.spinner("πŸ“‘ Loading dataset from HuggingFace..."): - df = load_dataset(HF_TOKEN) + df_raw = load_dataset(HF_TOKEN) -if df.empty: +if df_raw.empty: st.stop() # ── Freshness check ─────────────────────────────────────────────────────────── -freshness = check_data_freshness(df) +freshness = check_data_freshness(df_raw) show_freshness_status(freshness) # ── Dataset summary in sidebar ──────────────────────────────────────────────── with st.sidebar: st.divider() st.subheader("πŸ“¦ Dataset Info") - summary = dataset_summary(df) + summary = dataset_summary(df_raw) if summary: st.write(f"**Rows:** {summary['rows']:,}") st.write(f"**Range:** {summary['start_date']} β†’ {summary['end_date']}") - st.write(f"**ETFs:** {', '.join([e.replace('_Ret','') for e in summary['etfs_found']])}") - st.write(f"**Benchmarks:** {', '.join([b.replace('_Ret','') for b in summary['benchmarks']])}") + st.write(f"**ETFs:** {', '.join(summary['etfs_found'])}") + st.write(f"**Benchmarks:** {', '.join(summary['benchmarks'])}") + st.write(f"**Macro:** {', '.join(summary['macro_found'])}") st.write(f"**T-bill col:** {'βœ…' if summary['tbill_found'] else '❌'}") -# ── Main execution ──────────────────────────────────────────────────────────── +# ── Wait for run button ─────────────────────────────────────────────────────── if not run_button: - st.info("πŸ‘ˆ Configure parameters in the sidebar and click **πŸš€ Run All 3 Approaches** to begin.") + st.info("πŸ‘ˆ Configure parameters in the sidebar and click **πŸš€ Run All 3 Approaches**.") st.stop() # ── Filter by start year ────────────────────────────────────────────────────── -df = df[df.index.year >= start_yr].copy() -st.write(f"πŸ“… **Data:** {df.index[0].strftime('%Y-%m-%d')} β†’ {df.index[-1].strftime('%Y-%m-%d')} " - f"({df.index[-1].year - df.index[0].year + 1} years)") +df = df_raw[df_raw.index.year >= start_yr].copy() +st.write( + f"πŸ“… **Data:** {df.index[0].strftime('%Y-%m-%d')} β†’ {df.index[-1].strftime('%Y-%m-%d')} " + f"({df.index[-1].year - df.index[0].year + 1} years)" +) -# ── Feature / target extraction ─────────────────────────────────────────────── +# ── Features & targets ──────────────────────────────────────────────────────── try: - input_features, target_etfs, tbill_rate = get_features_and_targets(df) + input_features, target_etfs, tbill_rate, df = get_features_and_targets(df) except ValueError as e: st.error(str(e)) st.stop() -st.info(f"🎯 **Targets:** {len(target_etfs)} ETFs Β· **Features:** {len(input_features)} signals Β· " - f"**T-bill rate:** {tbill_rate*100:.2f}%") +n_etfs = len(target_etfs) +n_classes = n_etfs + (1 if include_cash else 0) + +st.info( + f"🎯 **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} Β· " + f"**Features:** {len(input_features)} signals Β· " + f"**T-bill:** {tbill_rate*100:.2f}%" +) -# ── Prepare sequences ───────────────────────────────────────────────────────── -X_raw = df[input_features].values.astype(np.float32) -y_raw = df[target_etfs].values.astype(np.float32) -n_etfs = len(target_etfs) -n_classes = n_etfs + (1 if include_cash else 0) # +1 for CASH +# ── Build sequences ─────────────────────────────────────────────────────────── +X_raw = df[input_features].values.astype(np.float32) +y_raw = df[target_etfs].values.astype(np.float32) -# Fill NaNs with column means +# Fill any remaining NaNs with column means col_means = np.nanmean(X_raw, axis=0) for j in range(X_raw.shape[1]): mask = np.isnan(X_raw[:, j]) - X_raw[mask, j] = col_means[j] + if mask.any(): + X_raw[mask, j] = col_means[j] X_seq, y_seq = build_sequences(X_raw, y_raw, lookback) y_labels = returns_to_labels(y_seq, include_cash=include_cash) -X_train, y_train_r, X_val, y_val_r, X_test, y_test_r = train_val_test_split(X_seq, y_seq, train_pct, val_pct) -_, y_train_l, _, y_val_l, _, y_test_l = train_val_test_split(X_seq, y_labels, train_pct, val_pct) +(X_train, y_train_r, X_val, y_val_r, + X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct) +(_, y_train_l, _, y_val_l, + _, y_test_l) = train_val_test_split(X_seq, y_labels, train_pct, val_pct) X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test) train_size = len(X_train) val_size = len(X_val) -# Test dates (aligned with y_test) -test_start = lookback + train_size + val_size -test_dates = df.index[test_start: test_start + len(X_test)] -test_slice = slice(test_start, test_start + len(X_test)) +test_start = lookback + train_size + val_size +test_dates = df.index[test_start: test_start + len(X_test)] +test_slice = slice(test_start, test_start + len(X_test)) -st.success(f"βœ… Sequences β€” Train: {train_size} Β· Val: {val_size} Β· Test: {len(X_test)}") +st.success( + f"βœ… Sequences β€” Train: {train_size:,} Β· Val: {val_size:,} Β· Test: {len(X_test):,}" +) # ── Train all three approaches ──────────────────────────────────────────────── results = {} -trained_info = {} # store extra info needed for conviction +trained_info = {} progress = st.progress(0, text="Starting training...") -# ── Approach 1: Wavelet ─────────────────────────────────────────────────────── +# ── Approach 1 ──────────────────────────────────────────────────────────────── with st.spinner("🌊 Training Approach 1 β€” Wavelet CNN-LSTM..."): try: model1, hist1, _ = train_approach1( @@ -163,7 +179,8 @@ with st.spinner("🌊 Training Approach 1 β€” Wavelet CNN-LSTM..."): ) preds1, proba1 = predict_approach1(model1, X_test_s) results["Approach 1"] = execute_strategy( - preds1, proba1, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash, + preds1, proba1, y_test_r, test_dates, + target_etfs, fee_bps, tbill_rate, include_cash, ) trained_info["Approach 1"] = {"proba": proba1} st.success("βœ… Approach 1 complete") @@ -173,7 +190,7 @@ with st.spinner("🌊 Training Approach 1 β€” Wavelet CNN-LSTM..."): progress.progress(33, text="Approach 1 done...") -# ── Approach 2: Regime-Conditioned ─────────────────────────────────────────── +# ── Approach 2 ──────────────────────────────────────────────────────────────── with st.spinner("πŸ”€ Training Approach 2 β€” Regime-Conditioned CNN-LSTM..."): try: model2, hist2, hmm2, regime_cols2 = train_approach2( @@ -191,7 +208,8 @@ with st.spinner("πŸ”€ Training Approach 2 β€” Regime-Conditioned CNN-LSTM..."): lookback, train_size, val_size, ) results["Approach 2"] = execute_strategy( - preds2, proba2, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash, + preds2, proba2, y_test_r, test_dates, + target_etfs, fee_bps, tbill_rate, include_cash, ) trained_info["Approach 2"] = {"proba": proba2} st.success("βœ… Approach 2 complete") @@ -201,7 +219,7 @@ with st.spinner("πŸ”€ Training Approach 2 β€” Regime-Conditioned CNN-LSTM..."): progress.progress(66, text="Approach 2 done...") -# ── Approach 3: Multi-Scale ─────────────────────────────────────────────────── +# ── Approach 3 ──────────────────────────────────────────────────────────────── with st.spinner("πŸ“‘ Training Approach 3 β€” Multi-Scale CNN-LSTM..."): try: model3, hist3 = train_approach3( @@ -211,7 +229,8 @@ with st.spinner("πŸ“‘ Training Approach 3 β€” Multi-Scale CNN-LSTM..."): ) preds3, proba3 = predict_approach3(model3, X_test_s) results["Approach 3"] = execute_strategy( - preds3, proba3, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash, + preds3, proba3, y_test_r, test_dates, + target_etfs, fee_bps, tbill_rate, include_cash, ) trained_info["Approach 3"] = {"proba": proba3} st.success("βœ… Approach 3 complete") @@ -227,15 +246,14 @@ winner_name = select_winner(results) winner_res = results.get(winner_name) if winner_res is None: - st.error("❌ All approaches failed. Please check your data and configuration.") + st.error("❌ All approaches failed. Please check data and configuration.") st.stop() -# ── Next trading date ───────────────────────────────────────────────────────── next_date = get_next_signal_date() st.divider() -# ── Signal banner (winner) ──────────────────────────────────────────────────── +# ── Signal banner ───────────────────────────────────────────────────────────── show_signal_banner(winner_res["next_signal"], next_date, winner_name) # ── Conviction panel ────────────────────────────────────────────────────────── @@ -256,7 +274,6 @@ st.subheader("πŸ† Approach Comparison (Winner = Highest Raw Annualised Return)" comparison_df = build_comparison_table(results, winner_name) show_comparison_table(comparison_df) -# ── Comparison bar chart ────────────────────────────────────────────────────── st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True) st.divider() @@ -268,6 +285,6 @@ st.plotly_chart(fig, use_container_width=True) st.divider() -# ── Audit trail (winner) ────────────────────────────────────────────────────── +# ── Audit trail ─────────────────────────────────────────────────────────────── st.subheader(f"πŸ“‹ Audit Trail β€” {winner_name} (Last 20 Trading Days)") show_audit_trail(winner_res["audit_trail"]) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py index b96ee711d680e273094523bcd220f93b06eb486d..ae2533008e483f244c7dd3a1e189bb64e4488a4f 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py @@ -3,6 +3,10 @@ data/loader.py Loads master_data.parquet from HF Dataset. Validates freshness against the last NYSE trading day. No external pings β€” all data comes from HF Dataset only. + +Actual dataset columns (from parquet inspection): + ETFs : AGG, GLD, SLV, SPY, TBT, TLT, VNQ + Macro : VIX, DXY, T10Y2Y, TBILL_3M, IG_SPREAD, HY_SPREAD """ import pandas as pd @@ -22,31 +26,29 @@ except ImportError: DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data" PARQUET_FILE = "master_data.parquet" -# Columns expected in the dataset -REQUIRED_ETF_COLS = ["TLT_Ret", "TBT_Ret", "VNQ_Ret", "SLV_Ret", "GLD_Ret"] -BENCHMARK_COLS = ["SPY_Ret", "AGG_Ret"] -TBILL_COL = "DTB3" # 3m T-bill column in HF dataset -TARGET_ETFS = REQUIRED_ETF_COLS # 5 targets (no CASH in returns, CASH handled in strategy) +# ── Actual column names in the dataset ─────────────────────────────────────── +TARGET_ETF_COLS = ["TLT", "TBT", "VNQ", "SLV", "GLD"] # traded ETFs +BENCHMARK_COLS = ["SPY", "AGG"] # chart only +TBILL_COL = "TBILL_3M" # 3m T-bill rate +MACRO_COLS = ["VIX", "DXY", "T10Y2Y", "IG_SPREAD", "HY_SPREAD"] # ── NYSE calendar helpers ───────────────────────────────────────────────────── -def get_last_nyse_trading_day(as_of: datetime = None) -> datetime.date: - """Return the most recent NYSE trading day before or on as_of (default: today EST).""" +def get_last_nyse_trading_day(as_of=None): + """Return the most recent NYSE trading day on or before as_of (default: today EST).""" est = pytz.timezone("US/Eastern") if as_of is None: as_of = datetime.now(est) - today = as_of.date() if NYSE_CAL_AVAILABLE: try: - nyse = mcal.get_calendar("NYSE") - # Look back up to 10 days to find last trading day + nyse = mcal.get_calendar("NYSE") start = today - timedelta(days=10) - schedule = nyse.schedule(start_date=start, end_date=today) - if len(schedule) > 0: - return schedule.index[-1].date() + sched = nyse.schedule(start_date=start, end_date=today) + if len(sched) > 0: + return sched.index[-1].date() except Exception: pass @@ -57,18 +59,6 @@ def get_last_nyse_trading_day(as_of: datetime = None) -> datetime.date: return candidate -def is_nyse_trading_day(date) -> bool: - """Return True if date is a NYSE trading day.""" - if NYSE_CAL_AVAILABLE: - try: - nyse = mcal.get_calendar("NYSE") - schedule = nyse.schedule(start_date=date, end_date=date) - return len(schedule) > 0 - except Exception: - pass - return date.weekday() < 5 - - # ── Data loading ────────────────────────────────────────────────────────────── @st.cache_data(ttl=3600, show_spinner=False) @@ -88,10 +78,10 @@ def load_dataset(hf_token: str) -> pd.DataFrame: # Ensure DatetimeIndex if not isinstance(df.index, pd.DatetimeIndex): - if "Date" in df.columns: - df = df.set_index("Date") - elif "date" in df.columns: - df = df.set_index("date") + for col in ["Date", "date", "DATE"]: + if col in df.columns: + df = df.set_index(col) + break df.index = pd.to_datetime(df.index) df = df.sort_index() @@ -107,14 +97,6 @@ def load_dataset(hf_token: str) -> pd.DataFrame: def check_data_freshness(df: pd.DataFrame) -> dict: """ Check whether the dataset contains data for the last NYSE trading day. - - Returns a dict: - { - "fresh": bool, - "last_date_in_data": date, - "expected_date": date, - "message": str - } """ if df.empty: return { @@ -126,8 +108,7 @@ def check_data_freshness(df: pd.DataFrame) -> dict: last_date_in_data = df.index[-1].date() expected_date = get_last_nyse_trading_day() - - fresh = last_date_in_data >= expected_date + fresh = last_date_in_data >= expected_date if fresh: message = f"βœ… Dataset is up to date through **{last_date_in_data}**." @@ -150,66 +131,105 @@ def check_data_freshness(df: pd.DataFrame) -> dict: def get_features_and_targets(df: pd.DataFrame): """ - Extract input feature columns and target ETF return columns from the dataset. + Extract input feature columns and target ETF return columns. + + The dataset stores raw price or return values directly under ticker names. + We compute daily log returns for target ETFs if they are not already returns. Returns: - input_features : list of column names - target_etfs : list of ETF return column names (e.g. TLT_Ret) - tbill_rate : latest 3m T-bill rate as a float (annualised, e.g. 0.045) + input_features : list of column names to use as model inputs + target_etfs : list of ETF column names (after return computation) + tbill_rate : latest 3m T-bill rate as float (annualised, e.g. 0.045) + df : DataFrame (possibly with new _Ret columns added) """ - # Target ETF return columns - target_etfs = [c for c in REQUIRED_ETF_COLS if c in df.columns] - if not target_etfs: + # ── Confirm target ETFs exist ───────────────────────────────────────────── + missing = [c for c in TARGET_ETF_COLS if c not in df.columns] + if missing: raise ValueError( - f"No target ETF columns found. Expected: {REQUIRED_ETF_COLS}. " + f"Missing ETF columns: {missing}. " f"Found in dataset: {list(df.columns)}" ) - # Input features: Z-scores, vol, regime, yield curve, credit, rates, VIX terms - exclude = set(target_etfs + BENCHMARK_COLS + [TBILL_COL]) + # ── Build return columns ────────────────────────────────────────────────── + # If values look like prices (>5), compute pct returns. + # If they already look like small returns (<1 in abs), use as-is. + target_etfs = [] + for col in TARGET_ETF_COLS: + ret_col = f"{col}_Ret" + if ret_col not in df.columns: + sample = df[col].dropna() + if len(sample) > 0 and abs(sample.median()) > 1: + # Looks like price β€” compute pct change + df[ret_col] = df[col].pct_change() + else: + # Already returns + df[ret_col] = df[col] + target_etfs.append(ret_col) + + # Same for benchmarks + for col in BENCHMARK_COLS: + ret_col = f"{col}_Ret" + if ret_col not in df.columns and col in df.columns: + sample = df[col].dropna() + if len(sample) > 0 and abs(sample.median()) > 1: + df[ret_col] = df[col].pct_change() + else: + df[ret_col] = df[col] + + # Drop rows with NaN in target columns (first row after pct_change) + df = df.dropna(subset=target_etfs) + + # ── Input features ──────────────────────────────────────────────────────── + # Use macro columns directly; exclude ETF price/return cols and benchmarks + exclude = set( + TARGET_ETF_COLS + BENCHMARK_COLS + + target_etfs + + [f"{c}_Ret" for c in BENCHMARK_COLS] + + [TBILL_COL] + ) + input_features = [ c for c in df.columns if c not in exclude - and ( - c.endswith("_Z") - or c.endswith("_Vol") - or "Regime" in c - or "YC_" in c - or "Credit_" in c - or "Rates_" in c - or "VIX_" in c - or "Spread" in c - or "DXY" in c - or "VIX" in c - or "T10Y" in c - ) + and c in (MACRO_COLS + [ + col for col in df.columns + if any(k in col for k in ["_Z", "_Vol", "Regime", "YC_", "Credit_", + "Rates_", "VIX_", "Spread", "DXY", "T10Y"]) + ]) ] - # 3m T-bill rate (for CASH return & Sharpe) - tbill_rate = 0.045 # default fallback + # Fallback: if none matched, use all non-excluded numeric columns + if not input_features: + input_features = [ + c for c in df.columns + if c not in exclude + and pd.api.types.is_numeric_dtype(df[c]) + ] + + # ── T-bill rate ─────────────────────────────────────────────────────────── + tbill_rate = 0.045 # default if TBILL_COL in df.columns: raw = df[TBILL_COL].dropna() if len(raw) > 0: - last_val = raw.iloc[-1] - # DTB3 is typically in percent (e.g. 5.25 means 5.25%) - tbill_rate = float(last_val) / 100 if last_val > 1 else float(last_val) + last_val = float(raw.iloc[-1]) + tbill_rate = last_val / 100 if last_val > 1 else last_val - return input_features, target_etfs, tbill_rate + return input_features, target_etfs, tbill_rate, df -# ── Column info helper (for sidebar display) ────────────────────────────────── +# ── Dataset summary ─────────────────────────────────────────────────────────── def dataset_summary(df: pd.DataFrame) -> dict: - """Return a brief summary dict for sidebar display.""" if df.empty: return {} return { - "rows": len(df), - "columns": len(df.columns), - "start_date": df.index[0].strftime("%Y-%m-%d"), - "end_date": df.index[-1].strftime("%Y-%m-%d"), - "etfs_found": [c for c in REQUIRED_ETF_COLS if c in df.columns], - "benchmarks": [c for c in BENCHMARK_COLS if c in df.columns], + "rows": len(df), + "columns": len(df.columns), + "start_date": df.index[0].strftime("%Y-%m-%d"), + "end_date": df.index[-1].strftime("%Y-%m-%d"), + "etfs_found": [c for c in TARGET_ETF_COLS if c in df.columns], + "benchmarks": [c for c in BENCHMARK_COLS if c in df.columns], + "macro_found": [c for c in MACRO_COLS if c in df.columns], "tbill_found": TBILL_COL in df.columns, } diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/data/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/data/__init__.py @@ -0,0 +1 @@ + diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md index 76b8435744e41334f32c6f1ad441d3b27fd29b2c..abfc7cb8aefe0be318b035c033e2ef83e1f2e0bf 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md @@ -1,3 +1,15 @@ +--- +title: P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES +emoji: 🧠 +colorFrom: green +colorTo: blue +sdk: streamlit +sdk_version: "1.32.0" +python_version: "3.10" +app_file: app.py +pinned: false +--- + # P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES Macro-driven ETF rotation using three augmented CNN-LSTM variants. diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/__init__.py @@ -0,0 +1 @@ + diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py new file mode 100644 index 0000000000000000000000000000000000000000..b96ee711d680e273094523bcd220f93b06eb486d --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py @@ -0,0 +1,215 @@ +""" +data/loader.py +Loads master_data.parquet from HF Dataset. +Validates freshness against the last NYSE trading day. +No external pings β€” all data comes from HF Dataset only. +""" + +import pandas as pd +import numpy as np +import streamlit as st +from huggingface_hub import hf_hub_download +from datetime import datetime, timedelta +import pytz +import os + +try: + import pandas_market_calendars as mcal + NYSE_CAL_AVAILABLE = True +except ImportError: + NYSE_CAL_AVAILABLE = False + +DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data" +PARQUET_FILE = "master_data.parquet" + +# Columns expected in the dataset +REQUIRED_ETF_COLS = ["TLT_Ret", "TBT_Ret", "VNQ_Ret", "SLV_Ret", "GLD_Ret"] +BENCHMARK_COLS = ["SPY_Ret", "AGG_Ret"] +TBILL_COL = "DTB3" # 3m T-bill column in HF dataset +TARGET_ETFS = REQUIRED_ETF_COLS # 5 targets (no CASH in returns, CASH handled in strategy) + + +# ── NYSE calendar helpers ───────────────────────────────────────────────────── + +def get_last_nyse_trading_day(as_of: datetime = None) -> datetime.date: + """Return the most recent NYSE trading day before or on as_of (default: today EST).""" + est = pytz.timezone("US/Eastern") + if as_of is None: + as_of = datetime.now(est) + + today = as_of.date() + + if NYSE_CAL_AVAILABLE: + try: + nyse = mcal.get_calendar("NYSE") + # Look back up to 10 days to find last trading day + start = today - timedelta(days=10) + schedule = nyse.schedule(start_date=start, end_date=today) + if len(schedule) > 0: + return schedule.index[-1].date() + except Exception: + pass + + # Fallback: skip weekends + candidate = today + while candidate.weekday() >= 5: + candidate -= timedelta(days=1) + return candidate + + +def is_nyse_trading_day(date) -> bool: + """Return True if date is a NYSE trading day.""" + if NYSE_CAL_AVAILABLE: + try: + nyse = mcal.get_calendar("NYSE") + schedule = nyse.schedule(start_date=date, end_date=date) + return len(schedule) > 0 + except Exception: + pass + return date.weekday() < 5 + + +# ── Data loading ────────────────────────────────────────────────────────────── + +@st.cache_data(ttl=3600, show_spinner=False) +def load_dataset(hf_token: str) -> pd.DataFrame: + """ + Download master_data.parquet from HF Dataset and return as DataFrame. + Cached for 1 hour. Index is parsed as DatetimeIndex. + """ + try: + path = hf_hub_download( + repo_id=DATASET_REPO, + filename=PARQUET_FILE, + repo_type="dataset", + token=hf_token, + ) + df = pd.read_parquet(path) + + # Ensure DatetimeIndex + if not isinstance(df.index, pd.DatetimeIndex): + if "Date" in df.columns: + df = df.set_index("Date") + elif "date" in df.columns: + df = df.set_index("date") + df.index = pd.to_datetime(df.index) + + df = df.sort_index() + return df + + except Exception as e: + st.error(f"❌ Failed to load dataset from HuggingFace: {e}") + return pd.DataFrame() + + +# ── Freshness check ─────────────────────────────────────────────────────────── + +def check_data_freshness(df: pd.DataFrame) -> dict: + """ + Check whether the dataset contains data for the last NYSE trading day. + + Returns a dict: + { + "fresh": bool, + "last_date_in_data": date, + "expected_date": date, + "message": str + } + """ + if df.empty: + return { + "fresh": False, + "last_date_in_data": None, + "expected_date": None, + "message": "Dataset is empty.", + } + + last_date_in_data = df.index[-1].date() + expected_date = get_last_nyse_trading_day() + + fresh = last_date_in_data >= expected_date + + if fresh: + message = f"βœ… Dataset is up to date through **{last_date_in_data}**." + else: + message = ( + f"⚠️ **{expected_date}** data not yet updated in dataset. " + f"Latest available: **{last_date_in_data}**. " + f"Please check back later β€” the dataset updates daily after market close." + ) + + return { + "fresh": fresh, + "last_date_in_data": last_date_in_data, + "expected_date": expected_date, + "message": message, + } + + +# ── Feature / target extraction ─────────────────────────────────────────────── + +def get_features_and_targets(df: pd.DataFrame): + """ + Extract input feature columns and target ETF return columns from the dataset. + + Returns: + input_features : list of column names + target_etfs : list of ETF return column names (e.g. TLT_Ret) + tbill_rate : latest 3m T-bill rate as a float (annualised, e.g. 0.045) + """ + # Target ETF return columns + target_etfs = [c for c in REQUIRED_ETF_COLS if c in df.columns] + + if not target_etfs: + raise ValueError( + f"No target ETF columns found. Expected: {REQUIRED_ETF_COLS}. " + f"Found in dataset: {list(df.columns)}" + ) + + # Input features: Z-scores, vol, regime, yield curve, credit, rates, VIX terms + exclude = set(target_etfs + BENCHMARK_COLS + [TBILL_COL]) + input_features = [ + c for c in df.columns + if c not in exclude + and ( + c.endswith("_Z") + or c.endswith("_Vol") + or "Regime" in c + or "YC_" in c + or "Credit_" in c + or "Rates_" in c + or "VIX_" in c + or "Spread" in c + or "DXY" in c + or "VIX" in c + or "T10Y" in c + ) + ] + + # 3m T-bill rate (for CASH return & Sharpe) + tbill_rate = 0.045 # default fallback + if TBILL_COL in df.columns: + raw = df[TBILL_COL].dropna() + if len(raw) > 0: + last_val = raw.iloc[-1] + # DTB3 is typically in percent (e.g. 5.25 means 5.25%) + tbill_rate = float(last_val) / 100 if last_val > 1 else float(last_val) + + return input_features, target_etfs, tbill_rate + + +# ── Column info helper (for sidebar display) ────────────────────────────────── + +def dataset_summary(df: pd.DataFrame) -> dict: + """Return a brief summary dict for sidebar display.""" + if df.empty: + return {} + return { + "rows": len(df), + "columns": len(df.columns), + "start_date": df.index[0].strftime("%Y-%m-%d"), + "end_date": df.index[-1].strftime("%Y-%m-%d"), + "etfs_found": [c for c in REQUIRED_ETF_COLS if c in df.columns], + "benchmarks": [c for c in BENCHMARK_COLS if c in df.columns], + "tbill_found": TBILL_COL in df.columns, + } diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md index db5c9c5e3605dc62b9c2835d44bba4f98c526886..76b8435744e41334f32c6f1ad441d3b27fd29b2c 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md @@ -1,19 +1,115 @@ +# P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES + +Macro-driven ETF rotation using three augmented CNN-LSTM variants. +Winner selected by **highest raw annualised return** on the out-of-sample test set. + +--- + +## Architecture Overview + +| Approach | Core Idea | Key Addition | +|---|---|---| +| **1 β€” Wavelet** | DWT decomposes each macro signal into frequency subbands before the CNN | Separates trend / cycle / noise | +| **2 β€” Regime-Conditioned** | HMM detects macro regimes; one-hot regime label concatenated into the network | Removes non-stationarity | +| **3 β€” Multi-Scale Parallel** | Three CNN towers (kernels 3, 7, 21 days) run in parallel before the LSTM | Captures momentum + cycle + trend simultaneously | + --- -title: P2 ETF CNN LSTM ALTERNATIVE APPROACHES -emoji: πŸš€ -colorFrom: red -colorTo: red -sdk: docker -app_port: 8501 -tags: -- streamlit -pinned: false -short_description: Streamlit template space + +## ETF Universe + +| Ticker | Description | +|---|---| +| TLT | 20+ Year Treasury Bond | +| TBT | 20+ Year Treasury Short (2Γ—) | +| VNQ | Real Estate (REIT) | +| SLV | Silver | +| GLD | Gold | +| CASH | 3m T-bill rate (from HF dataset) | + +Benchmarks (chart only, not traded): **SPY**, **AGG** + +--- + +## Data + +All data sourced exclusively from: +**`P2SAMAPA/fi-etf-macro-signal-master-data`** (HuggingFace Dataset) +File: `master_data.parquet` + +No external API calls (no yfinance, no FRED). +The app checks daily whether the prior NYSE trading day's data is present in the dataset. + --- -# Welcome to Streamlit! +## Project Structure + +``` +β”œβ”€β”€ .github/ +β”‚ └── workflows/ +β”‚ └── sync.yml # Auto-sync GitHub β†’ HF Space on push to main +β”‚ +β”œβ”€β”€ app.py # Streamlit orchestrator (UI wiring only) +β”‚ +β”œβ”€β”€ data/ +β”‚ └── loader.py # HF dataset load, freshness check, column validation +β”‚ +β”œβ”€β”€ models/ +β”‚ β”œβ”€β”€ base.py # Shared: sequences, splits, scaling, callbacks +β”‚ β”œβ”€β”€ approach1_wavelet.py # Wavelet CNN-LSTM +β”‚ β”œβ”€β”€ approach2_regime.py # Regime-Conditioned CNN-LSTM +β”‚ └── approach3_multiscale.py # Multi-Scale Parallel CNN-LSTM +β”‚ +β”œβ”€β”€ strategy/ +β”‚ └── backtest.py # execute_strategy, metrics, winner selection +β”‚ +β”œβ”€β”€ signals/ +β”‚ └── conviction.py # Z-score conviction scoring +β”‚ +β”œβ”€β”€ ui/ +β”‚ β”œβ”€β”€ components.py # Banner, conviction panel, metrics, audit trail +β”‚ └── charts.py # Plotly equity curve + comparison bar chart +β”‚ +β”œβ”€β”€ utils/ +β”‚ └── calendar.py # NYSE calendar, next trading day, EST time +β”‚ +β”œβ”€β”€ requirements.txt +└── README.md +``` + +--- + +## Secrets Required + +| Secret | Where | Purpose | +|---|---|---| +| `HF_TOKEN` | GitHub + HF Space | Read HF dataset Β· Sync HF Space | + +Set in: +- GitHub: `Settings β†’ Secrets β†’ Actions β†’ New repository secret` +- HF Space: `Settings β†’ Repository secrets` + +--- + +## Deployment + +Push to `main` β†’ GitHub Actions (`sync.yml`) automatically syncs to HF Space. + +### Local development + +```bash +pip install -r requirements.txt +export HF_TOKEN=your_token +streamlit run app.py +``` + +--- -Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart: +## Output UI -If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community -forums](https://discuss.streamlit.io). +1. **Data freshness warning** β€” alerts if prior NYSE trading day data is missing +2. **Next Trading Day Signal** β€” date + ETF from the winning approach +3. **Signal Conviction** β€” Z-score gauge + per-ETF probability bars +4. **Performance Metrics** β€” Annualised Return, Sharpe, Hit Ratio, Max DD +5. **Approach Comparison Table** β€” all three approaches side by side +6. **Equity Curves** β€” all three approaches + SPY + AGG benchmarks +7. **Audit Trail** β€” last 20 trading days for the winning approach diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py new file mode 100644 index 0000000000000000000000000000000000000000..eded65d240364585407db8613bb5c2acdbe1d7d9 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py @@ -0,0 +1,273 @@ +""" +app.py +P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES +Streamlit orchestrator β€” UI wiring only, no business logic here. +""" + +import os +import streamlit as st +import pandas as pd +import numpy as np + +# ── Module imports ──────────────────────────────────────────────────────────── +from data.loader import load_dataset, check_data_freshness, get_features_and_targets, dataset_summary +from utils.calendar import get_est_time, is_sync_window, get_next_signal_date +from models.base import build_sequences, train_val_test_split, scale_features, returns_to_labels +from models.approach1_wavelet import train_approach1, predict_approach1 +from models.approach2_regime import train_approach2, predict_approach2 +from models.approach3_multiscale import train_approach3, predict_approach3 +from strategy.backtest import execute_strategy, select_winner, build_comparison_table +from signals.conviction import compute_conviction +from ui.components import ( + show_freshness_status, show_signal_banner, show_conviction_panel, + show_metrics_row, show_comparison_table, show_audit_trail, +) +from ui.charts import equity_curve_chart, comparison_bar_chart + +# ── Page config ─────────────────────────────────────────────────────────────── +st.set_page_config( + page_title="P2-ETF-CNN-LSTM", + page_icon="🧠", + layout="wide", +) + +# ── Secrets ─────────────────────────────────────────────────────────────────── +HF_TOKEN = os.getenv("HF_TOKEN", "") + +# ── Sidebar ─────────────────────────────────────────────────────────────────── +with st.sidebar: + st.header("βš™οΈ Configuration") + + now_est = get_est_time() + st.write(f"πŸ•’ **EST:** {now_est.strftime('%H:%M:%S')}") + if is_sync_window(): + st.success("βœ… Sync Window Active") + else: + st.info("⏸️ Sync Window Inactive") + + st.divider() + + start_yr = st.slider("πŸ“… Start Year", 2010, 2024, 2016) + fee_bps = st.slider("πŸ’° Fee (bps)", 0, 50, 10) + lookback = st.slider("πŸ“ Lookback (days)", 20, 60, 30, step=5) + epochs = st.number_input("πŸ” Max Epochs", 20, 300, 100, step=10) + + st.divider() + + split_option = st.selectbox("πŸ“Š Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0) + split_map = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)} + train_pct, val_pct = split_map[split_option] + + include_cash = st.checkbox("πŸ’΅ Include CASH class", value=True, + help="Model can select CASH (earns T-bill rate) as an alternative to any ETF") + + st.divider() + + run_button = st.button("πŸš€ Run All 3 Approaches", type="primary", use_container_width=True) + +# ── Title ───────────────────────────────────────────────────────────────────── +st.title("🧠 P2-ETF-CNN-LSTM") +st.caption("Approach 1: Wavelet Β· Approach 2: Regime-Conditioned Β· Approach 3: Multi-Scale Parallel") +st.caption("Winner selected by highest raw annualised return on out-of-sample test set.") + +# ── Load data (always, to check freshness) ──────────────────────────────────── +if not HF_TOKEN: + st.error("❌ HF_TOKEN secret not found. Please add it to your HF Space / GitHub secrets.") + st.stop() + +with st.spinner("πŸ“‘ Loading dataset from HuggingFace..."): + df = load_dataset(HF_TOKEN) + +if df.empty: + st.stop() + +# ── Freshness check ─────────────────────────────────────────────────────────── +freshness = check_data_freshness(df) +show_freshness_status(freshness) + +# ── Dataset summary in sidebar ──────────────────────────────────────────────── +with st.sidebar: + st.divider() + st.subheader("πŸ“¦ Dataset Info") + summary = dataset_summary(df) + if summary: + st.write(f"**Rows:** {summary['rows']:,}") + st.write(f"**Range:** {summary['start_date']} β†’ {summary['end_date']}") + st.write(f"**ETFs:** {', '.join([e.replace('_Ret','') for e in summary['etfs_found']])}") + st.write(f"**Benchmarks:** {', '.join([b.replace('_Ret','') for b in summary['benchmarks']])}") + st.write(f"**T-bill col:** {'βœ…' if summary['tbill_found'] else '❌'}") + +# ── Main execution ──────────────────────────────────────────────────────────── +if not run_button: + st.info("πŸ‘ˆ Configure parameters in the sidebar and click **πŸš€ Run All 3 Approaches** to begin.") + st.stop() + +# ── Filter by start year ────────────────────────────────────────────────────── +df = df[df.index.year >= start_yr].copy() +st.write(f"πŸ“… **Data:** {df.index[0].strftime('%Y-%m-%d')} β†’ {df.index[-1].strftime('%Y-%m-%d')} " + f"({df.index[-1].year - df.index[0].year + 1} years)") + +# ── Feature / target extraction ─────────────────────────────────────────────── +try: + input_features, target_etfs, tbill_rate = get_features_and_targets(df) +except ValueError as e: + st.error(str(e)) + st.stop() + +st.info(f"🎯 **Targets:** {len(target_etfs)} ETFs Β· **Features:** {len(input_features)} signals Β· " + f"**T-bill rate:** {tbill_rate*100:.2f}%") + +# ── Prepare sequences ───────────────────────────────────────────────────────── +X_raw = df[input_features].values.astype(np.float32) +y_raw = df[target_etfs].values.astype(np.float32) +n_etfs = len(target_etfs) +n_classes = n_etfs + (1 if include_cash else 0) # +1 for CASH + +# Fill NaNs with column means +col_means = np.nanmean(X_raw, axis=0) +for j in range(X_raw.shape[1]): + mask = np.isnan(X_raw[:, j]) + X_raw[mask, j] = col_means[j] + +X_seq, y_seq = build_sequences(X_raw, y_raw, lookback) +y_labels = returns_to_labels(y_seq, include_cash=include_cash) + +X_train, y_train_r, X_val, y_val_r, X_test, y_test_r = train_val_test_split(X_seq, y_seq, train_pct, val_pct) +_, y_train_l, _, y_val_l, _, y_test_l = train_val_test_split(X_seq, y_labels, train_pct, val_pct) + +X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test) + +train_size = len(X_train) +val_size = len(X_val) + +# Test dates (aligned with y_test) +test_start = lookback + train_size + val_size +test_dates = df.index[test_start: test_start + len(X_test)] +test_slice = slice(test_start, test_start + len(X_test)) + +st.success(f"βœ… Sequences β€” Train: {train_size} Β· Val: {val_size} Β· Test: {len(X_test)}") + +# ── Train all three approaches ──────────────────────────────────────────────── +results = {} +trained_info = {} # store extra info needed for conviction + +progress = st.progress(0, text="Starting training...") + +# ── Approach 1: Wavelet ─────────────────────────────────────────────────────── +with st.spinner("🌊 Training Approach 1 β€” Wavelet CNN-LSTM..."): + try: + model1, hist1, _ = train_approach1( + X_train_s, y_train_l, + X_val_s, y_val_l, + n_classes=n_classes, epochs=int(epochs), + ) + preds1, proba1 = predict_approach1(model1, X_test_s) + results["Approach 1"] = execute_strategy( + preds1, proba1, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash, + ) + trained_info["Approach 1"] = {"proba": proba1} + st.success("βœ… Approach 1 complete") + except Exception as e: + st.warning(f"⚠️ Approach 1 failed: {e}") + results["Approach 1"] = None + +progress.progress(33, text="Approach 1 done...") + +# ── Approach 2: Regime-Conditioned ─────────────────────────────────────────── +with st.spinner("πŸ”€ Training Approach 2 β€” Regime-Conditioned CNN-LSTM..."): + try: + model2, hist2, hmm2, regime_cols2 = train_approach2( + X_train_s, y_train_l, + X_val_s, y_val_l, + X_flat_all=X_raw, + feature_names=input_features, + lookback=lookback, + train_size=train_size, + val_size=val_size, + n_classes=n_classes, epochs=int(epochs), + ) + preds2, proba2 = predict_approach2( + model2, X_test_s, X_raw, regime_cols2, hmm2, + lookback, train_size, val_size, + ) + results["Approach 2"] = execute_strategy( + preds2, proba2, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash, + ) + trained_info["Approach 2"] = {"proba": proba2} + st.success("βœ… Approach 2 complete") + except Exception as e: + st.warning(f"⚠️ Approach 2 failed: {e}") + results["Approach 2"] = None + +progress.progress(66, text="Approach 2 done...") + +# ── Approach 3: Multi-Scale ─────────────────────────────────────────────────── +with st.spinner("πŸ“‘ Training Approach 3 β€” Multi-Scale CNN-LSTM..."): + try: + model3, hist3 = train_approach3( + X_train_s, y_train_l, + X_val_s, y_val_l, + n_classes=n_classes, epochs=int(epochs), + ) + preds3, proba3 = predict_approach3(model3, X_test_s) + results["Approach 3"] = execute_strategy( + preds3, proba3, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash, + ) + trained_info["Approach 3"] = {"proba": proba3} + st.success("βœ… Approach 3 complete") + except Exception as e: + st.warning(f"⚠️ Approach 3 failed: {e}") + results["Approach 3"] = None + +progress.progress(100, text="All approaches complete!") +progress.empty() + +# ── Select winner ───────────────────────────────────────────────────────────── +winner_name = select_winner(results) +winner_res = results.get(winner_name) + +if winner_res is None: + st.error("❌ All approaches failed. Please check your data and configuration.") + st.stop() + +# ── Next trading date ───────────────────────────────────────────────────────── +next_date = get_next_signal_date() + +st.divider() + +# ── Signal banner (winner) ──────────────────────────────────────────────────── +show_signal_banner(winner_res["next_signal"], next_date, winner_name) + +# ── Conviction panel ────────────────────────────────────────────────────────── +winner_proba = trained_info[winner_name]["proba"] +conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash) +show_conviction_panel(conviction) + +st.divider() + +# ── Winner metrics ──────────────────────────────────────────────────────────── +st.subheader(f"πŸ“Š {winner_name} β€” Performance Metrics") +show_metrics_row(winner_res, tbill_rate) + +st.divider() + +# ── Comparison table ────────────────────────────────────────────────────────── +st.subheader("πŸ† Approach Comparison (Winner = Highest Raw Annualised Return)") +comparison_df = build_comparison_table(results, winner_name) +show_comparison_table(comparison_df) + +# ── Comparison bar chart ────────────────────────────────────────────────────── +st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True) + +st.divider() + +# ── Equity curves ───────────────────────────────────────────────────────────── +st.subheader("πŸ“ˆ Out-of-Sample Equity Curves β€” All Approaches vs Benchmarks") +fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate) +st.plotly_chart(fig, use_container_width=True) + +st.divider() + +# ── Audit trail (winner) ────────────────────────────────────────────────────── +st.subheader(f"πŸ“‹ Audit Trail β€” {winner_name} (Last 20 Trading Days)") +show_audit_trail(winner_res["audit_trail"]) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..5f51ead59f36f13043e036290df9440e25fe8cbe --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile @@ -0,0 +1,20 @@ +FROM python:3.13.5-slim + +WORKDIR /app + +RUN apt-get update && apt-get install -y \ + build-essential \ + curl \ + git \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt ./ +COPY src/ ./src/ + +RUN pip3 install -r requirements.txt + +EXPOSE 8501 + +HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health + +ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"] \ No newline at end of file diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md new file mode 100644 index 0000000000000000000000000000000000000000..db5c9c5e3605dc62b9c2835d44bba4f98c526886 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md @@ -0,0 +1,19 @@ +--- +title: P2 ETF CNN LSTM ALTERNATIVE APPROACHES +emoji: πŸš€ +colorFrom: red +colorTo: red +sdk: docker +app_port: 8501 +tags: +- streamlit +pinned: false +short_description: Streamlit template space +--- + +# Welcome to Streamlit! + +Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart: + +If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community +forums](https://discuss.streamlit.io). diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..28d994e22f8dd432b51df193562052e315ad95f7 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt @@ -0,0 +1,3 @@ +altair +pandas +streamlit \ No newline at end of file diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/src/streamlit_app.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/src/streamlit_app.py new file mode 100644 index 0000000000000000000000000000000000000000..99d0b84662681e7d21a08fcce44908344fa86f80 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/src/streamlit_app.py @@ -0,0 +1,40 @@ +import altair as alt +import numpy as np +import pandas as pd +import streamlit as st + +""" +# Welcome to Streamlit! + +Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:. +If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community +forums](https://discuss.streamlit.io). + +In the meantime, below is an example of what you can do with just a few lines of code: +""" + +num_points = st.slider("Number of points in spiral", 1, 10000, 1100) +num_turns = st.slider("Number of turns in spiral", 1, 300, 31) + +indices = np.linspace(0, 1, num_points) +theta = 2 * np.pi * num_turns * indices +radius = indices + +x = radius * np.cos(theta) +y = radius * np.sin(theta) + +df = pd.DataFrame({ + "x": x, + "y": y, + "idx": indices, + "rand": np.random.randn(num_points), +}) + +st.altair_chart(alt.Chart(df, height=700, width=700) + .mark_point(filled=True) + .encode( + x=alt.X("x", axis=None), + y=alt.Y("y", axis=None), + color=alt.Color("idx", legend=None, scale=alt.Scale()), + size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])), + )) \ No newline at end of file diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt index 28d994e22f8dd432b51df193562052e315ad95f7..0b1bc9a5b544b19aaa1f70c7ab427d1c5be3f9b2 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt @@ -1,3 +1,29 @@ -altair -pandas -streamlit \ No newline at end of file +# Core +streamlit>=1.32.0 +pandas>=2.0.0 +numpy>=1.24.0 + +# Hugging Face +huggingface_hub>=0.21.0 +datasets>=2.18.0 + +# Machine Learning +tensorflow>=2.14.0 +scikit-learn>=1.3.0 +xgboost>=2.0.0 + +# Wavelet (Approach 1) +PyWavelets>=1.5.0 + +# Regime detection (Approach 2) +hmmlearn>=0.3.0 + +# Visualisation +plotly>=5.18.0 + +# NYSE Calendar +pandas_market_calendars>=4.3.0 +pytz>=2024.1 + +# Parquet +pyarrow>=14.0.0 diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py new file mode 100644 index 0000000000000000000000000000000000000000..51a86329f292dd5bc931ddb615fb3fc76bf4a3fa --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py @@ -0,0 +1,199 @@ +""" +models/base.py +Shared utilities for all three CNN-LSTM variants: + - Data preparation (sequences, train/val/test split) + - Common Keras layers / callbacks + - Predict + evaluate helpers +""" + +import numpy as np +import pandas as pd +from sklearn.preprocessing import RobustScaler +import tensorflow as tf +from tensorflow import keras + +# ── Reproducibility ─────────────────────────────────────────────────────────── +SEED = 42 +tf.random.set_seed(SEED) +np.random.seed(SEED) + + +# ── Sequence builder ────────────────────────────────────────────────────────── + +def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int): + """ + Build supervised sequences for CNN-LSTM input. + + Args: + features : 2-D array [n_days, n_features] + targets : 2-D array [n_days, n_etfs] (raw returns) + lookback : number of past days per sample + + Returns: + X : [n_samples, lookback, n_features] + y : [n_samples, n_etfs] (raw returns for the next day) + """ + X, y = [], [] + for i in range(lookback, len(features)): + X.append(features[i - lookback: i]) + y.append(targets[i]) + return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32) + + +# ── Train / val / test split ────────────────────────────────────────────────── + +def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15): + """Split sequences into train / val / test preserving temporal order.""" + n = len(X) + t1 = int(n * train_pct) + t2 = int(n * (train_pct + val_pct)) + + return ( + X[:t1], y[:t1], + X[t1:t2], y[t1:t2], + X[t2:], y[t2:], + ) + + +# ── Feature scaling ─────────────────────────────────────────────────────────── + +def scale_features(X_train, X_val, X_test): + """ + Fit RobustScaler on training data only, apply to val and test. + Operates on the flattened feature dimension. + + Returns scaled arrays with same shape as inputs. + """ + n_train, lb, n_feat = X_train.shape + scaler = RobustScaler() + + # Fit on train + scaler.fit(X_train.reshape(-1, n_feat)) + + def _transform(X): + shape = X.shape + return scaler.transform(X.reshape(-1, n_feat)).reshape(shape) + + return _transform(X_train), _transform(X_val), _transform(X_test), scaler + + +# ── Label builder (classification: argmax of returns) ──────────────────────── + +def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0): + """ + Convert raw return matrix to integer class labels. + + If include_cash=True, adds a CASH class (index = n_etfs) when + the best ETF return is below cash_threshold. + + Args: + y_raw : [n_samples, n_etfs] + include_cash : whether to allow CASH class + cash_threshold : minimum ETF return to prefer over CASH + + Returns: + labels : [n_samples] integer class indices + """ + best = np.argmax(y_raw, axis=1) + if include_cash: + best_return = y_raw[np.arange(len(y_raw)), best] + cash_idx = y_raw.shape[1] + labels = np.where(best_return < cash_threshold, cash_idx, best) + else: + labels = best + return labels.astype(np.int32) + + +# ── Common Keras callbacks ──────────────────────────────────────────────────── + +def get_callbacks(patience_es=15, patience_lr=8, min_lr=1e-6): + """Standard early stopping + reduce-LR callbacks shared by all models.""" + return [ + keras.callbacks.EarlyStopping( + monitor="val_loss", + patience=patience_es, + restore_best_weights=True, + verbose=0, + ), + keras.callbacks.ReduceLROnPlateau( + monitor="val_loss", + factor=0.5, + patience=patience_lr, + min_lr=min_lr, + verbose=0, + ), + ] + + +# ── Common output head ──────────────────────────────────────────────────────── + +def classification_head(x, n_classes: int, dropout: float = 0.3): + """ + Shared dense output head for all three CNN-LSTM variants. + + Args: + x : input tensor + n_classes : number of ETF classes (+ 1 for CASH if applicable) + dropout : dropout rate + + Returns: + output tensor with softmax activation + """ + x = keras.layers.Dense(64, activation="relu")(x) + x = keras.layers.Dropout(dropout)(x) + x = keras.layers.Dense(n_classes, activation="softmax")(x) + return x + + +# ── Prediction helper ───────────────────────────────────────────────────────── + +def predict_classes(model, X_test: np.ndarray) -> np.ndarray: + """Return integer class predictions from a Keras model.""" + proba = model.predict(X_test, verbose=0) + return np.argmax(proba, axis=1), proba + + +# ── Metrics helper ──────────────────────────────────────────────────────────── + +def evaluate_returns( + preds: np.ndarray, + proba: np.ndarray, + y_raw_test: np.ndarray, + target_etfs: list, + tbill_rate: float, + fee_bps: int, + include_cash: bool = True, +): + """ + Given integer class predictions and raw return matrix, + compute strategy returns and summary metrics. + + Returns: + strat_rets : np.ndarray of daily net returns + ann_return : annualised return (float) + cum_returns : cumulative return series + last_proba : probability vector for the last prediction + next_etf : name of ETF predicted for next session + """ + n_etfs = len(target_etfs) + strat_rets = [] + + for i, cls in enumerate(preds): + if include_cash and cls == n_etfs: + # CASH: earn daily T-bill rate + daily_tbill = tbill_rate / 252 + net = daily_tbill - (fee_bps / 10000) + else: + ret = y_raw_test[i][cls] + net = ret - (fee_bps / 10000) + strat_rets.append(net) + + strat_rets = np.array(strat_rets) + cum_returns = np.cumprod(1 + strat_rets) + ann_return = (cum_returns[-1] ** (252 / len(strat_rets))) - 1 + + last_proba = proba[-1] + next_cls = int(np.argmax(last_proba)) + next_etf = "CASH" if (include_cash and next_cls == n_etfs) else target_etfs[next_cls].replace("_Ret", "") + + return strat_rets, ann_return, cum_returns, last_proba, next_etf diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py new file mode 100644 index 0000000000000000000000000000000000000000..8c946f7ebacbb83f1b50b6df2563c2aafd3af10e --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py @@ -0,0 +1,167 @@ +""" +models/approach1_wavelet.py +Approach 1: Wavelet Decomposition CNN-LSTM + +Pipeline: + Raw macro signals + β†’ DWT (db4, level=3) per signal β†’ multi-band channel stack + β†’ 1D CNN (64 filters, k=3) β†’ MaxPool β†’ (32 filters, k=3) + β†’ LSTM (128 units) + β†’ Dense 64 β†’ Softmax (n_etfs + 1 CASH) +""" + +import numpy as np +import pywt +import tensorflow as tf +from tensorflow import keras +from models.base import classification_head, get_callbacks + +WAVELET = "db4" +LEVEL = 3 + + +# ── Wavelet feature engineering ─────────────────────────────────────────────── + +def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> np.ndarray: + """ + Decompose a 1-D signal into DWT subbands and return them stacked. + + For a signal of length T: + coeffs = [cA_n, cD_n, cD_{n-1}, ..., cD_1] + We interpolate each subband back to length T so we can stack them. + + Returns: array of shape [T, level+1] + """ + T = len(signal) + coeffs = pywt.wavedec(signal, wavelet, level=level) + bands = [] + for c in coeffs: + # Interpolate back to original length + band = np.interp( + np.linspace(0, len(c) - 1, T), + np.arange(len(c)), + c, + ) + bands.append(band) + return np.stack(bands, axis=-1) # [T, level+1] + + +def apply_wavelet_transform(X: np.ndarray, wavelet: str = WAVELET, level: int = LEVEL) -> np.ndarray: + """ + Apply DWT to every feature channel across all samples. + + Args: + X : [n_samples, lookback, n_features] + + Returns: + X_wt : [n_samples, lookback, n_features * (level+1)] + """ + n_samples, lookback, n_features = X.shape + n_bands = level + 1 + X_wt = np.zeros((n_samples, lookback, n_features * n_bands), dtype=np.float32) + + for s in range(n_samples): + for f in range(n_features): + decomposed = _wavelet_decompose_signal(X[s, :, f], wavelet, level) # [T, n_bands] + start = f * n_bands + X_wt[s, :, start: start + n_bands] = decomposed + + return X_wt + + +# ── Model builder ───────────────────────────────────────────────────────────── + +def build_wavelet_cnn_lstm( + input_shape: tuple, + n_classes: int, + dropout: float = 0.3, + lstm_units: int = 128, +) -> keras.Model: + """ + Build Wavelet CNN-LSTM model. + + Args: + input_shape : (lookback, n_features * n_bands) β€” post-DWT shape + n_classes : number of output classes (ETFs + CASH) + dropout : dropout rate + lstm_units : LSTM hidden size + + Returns: + Compiled Keras model + """ + inputs = keras.Input(shape=input_shape, name="wavelet_input") + + # CNN block 1 + x = keras.layers.Conv1D(64, kernel_size=3, padding="causal", activation="relu")(inputs) + x = keras.layers.BatchNormalization()(x) + x = keras.layers.MaxPooling1D(pool_size=2)(x) + + # CNN block 2 + x = keras.layers.Conv1D(32, kernel_size=3, padding="causal", activation="relu")(x) + x = keras.layers.BatchNormalization()(x) + x = keras.layers.Dropout(dropout)(x) + + # LSTM + x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(x) + + # Output head + outputs = classification_head(x, n_classes, dropout) + + model = keras.Model(inputs, outputs, name="Approach1_Wavelet_CNN_LSTM") + model.compile( + optimizer=keras.optimizers.Adam(learning_rate=1e-3), + loss="sparse_categorical_crossentropy", + metrics=["accuracy"], + ) + return model + + +# ── Full train pipeline ─────────────────────────────────────────────────────── + +def train_approach1( + X_train, y_train, + X_val, y_val, + n_classes: int, + epochs: int = 100, + batch_size: int = 32, + dropout: float = 0.3, + lstm_units: int = 128, +): + """ + Apply wavelet transform then train the CNN-LSTM. + + Args: + X_train/val : [n, lookback, n_features] (scaled, pre-wavelet) + y_train/val : [n] integer class labels + n_classes : total output classes + + Returns: + model : trained Keras model + history : training history + wt_shape : post-DWT input shape (for inference) + """ + # Apply DWT + X_train_wt = apply_wavelet_transform(X_train) + X_val_wt = apply_wavelet_transform(X_val) + + input_shape = X_train_wt.shape[1:] # (lookback, n_features * n_bands) + model = build_wavelet_cnn_lstm(input_shape, n_classes, dropout, lstm_units) + + history = model.fit( + X_train_wt, y_train, + validation_data=(X_val_wt, y_val), + epochs=epochs, + batch_size=batch_size, + callbacks=get_callbacks(), + verbose=0, + ) + + return model, history, input_shape + + +def predict_approach1(model, X_test: np.ndarray) -> tuple: + """Apply DWT to test set then predict. Returns (class_preds, proba).""" + X_test_wt = apply_wavelet_transform(X_test) + proba = model.predict(X_test_wt, verbose=0) + preds = np.argmax(proba, axis=1) + return preds, proba diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py @@ -0,0 +1 @@ + diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py new file mode 100644 index 0000000000000000000000000000000000000000..1b4e0821dacfec060ff2276feece20a75fc856cf --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py @@ -0,0 +1,150 @@ +""" +models/approach3_multiscale.py +Approach 3: Multi-Scale Parallel CNN-LSTM + +Pipeline: + Raw macro signals + β†’ 3 parallel CNN towers: kernel 3 (short), 7 (medium), 21 (long) + β†’ Concatenate [96 features] + β†’ LSTM (128 units) + β†’ Dense 64 β†’ Softmax (n_etfs + 1 CASH) +""" + +import numpy as np +import tensorflow as tf +from tensorflow import keras +from models.base import classification_head, get_callbacks + +# Kernel sizes represent: momentum (3d), weekly cycle (7d), monthly trend (21d) +KERNEL_SIZES = [3, 7, 21] +FILTERS_EACH = 32 # 32 Γ— 3 towers = 96 concatenated features + + +# ── Model builder ───────────────────────────────────────────────────────────── + +def build_multiscale_cnn_lstm( + input_shape: tuple, + n_classes: int, + kernel_sizes: list = None, + filters: int = FILTERS_EACH, + dropout: float = 0.3, + lstm_units: int = 128, +) -> keras.Model: + """ + Multi-scale parallel CNN-LSTM. + + Three CNN towers with different kernel sizes run in parallel on the + same input, capturing momentum, weekly cycle, and monthly trend + simultaneously. Their outputs are concatenated before the LSTM. + + Args: + input_shape : (lookback, n_features) + n_classes : number of output classes (ETFs + CASH) + kernel_sizes : list of kernel sizes for each tower + filters : number of Conv1D filters per tower + dropout : dropout rate + lstm_units : LSTM hidden size + + Returns: + Compiled Keras model + """ + if kernel_sizes is None: + kernel_sizes = KERNEL_SIZES + + inputs = keras.Input(shape=input_shape, name="multiscale_input") + + towers = [] + for k in kernel_sizes: + # Each tower: Conv β†’ BN β†’ Conv β†’ BN β†’ GlobalAvgPool + t = keras.layers.Conv1D( + filters, kernel_size=k, padding="causal", activation="relu", + name=f"conv1_k{k}" + )(inputs) + t = keras.layers.BatchNormalization(name=f"bn1_k{k}")(t) + t = keras.layers.Conv1D( + filters, kernel_size=k, padding="causal", activation="relu", + name=f"conv2_k{k}" + )(t) + t = keras.layers.BatchNormalization(name=f"bn2_k{k}")(t) + t = keras.layers.Dropout(dropout, name=f"drop_k{k}")(t) + towers.append(t) + + # Concatenate along the feature dimension β€” keeps temporal axis intact for LSTM + if len(towers) > 1: + merged = keras.layers.Concatenate(axis=-1, name="tower_concat")(towers) + else: + merged = towers[0] + + # LSTM integrates multi-scale temporal features + x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1, name="lstm")(merged) + + # Output head + outputs = classification_head(x, n_classes, dropout) + + model = keras.Model(inputs, outputs, name="Approach3_MultiScale_CNN_LSTM") + model.compile( + optimizer=keras.optimizers.Adam(learning_rate=1e-3), + loss="sparse_categorical_crossentropy", + metrics=["accuracy"], + ) + return model + + +# ── Full train pipeline ─────────────────────────────────────────────────────── + +def train_approach3( + X_train, y_train, + X_val, y_val, + n_classes: int, + epochs: int = 100, + batch_size: int = 32, + dropout: float = 0.3, + lstm_units: int = 128, + kernel_sizes: list = None, +): + """ + Build and train the multi-scale CNN-LSTM. + + Args: + X_train/val : [n, lookback, n_features] + y_train/val : [n] integer class labels + n_classes : total output classes + + Returns: + model : trained Keras model + history : training history + """ + if kernel_sizes is None: + kernel_sizes = KERNEL_SIZES + + # Guard: lookback must be >= largest kernel + lookback = X_train.shape[1] + valid_kernels = [k for k in kernel_sizes if k <= lookback] + if not valid_kernels: + valid_kernels = [min(3, lookback)] + + model = build_multiscale_cnn_lstm( + input_shape=X_train.shape[1:], + n_classes=n_classes, + kernel_sizes=valid_kernels, + dropout=dropout, + lstm_units=lstm_units, + ) + + history = model.fit( + X_train, y_train, + validation_data=(X_val, y_val), + epochs=epochs, + batch_size=batch_size, + callbacks=get_callbacks(), + verbose=0, + ) + + return model, history + + +def predict_approach3(model, X_test: np.ndarray) -> tuple: + """Predict on test set. Returns (class_preds, proba).""" + proba = model.predict(X_test, verbose=0) + preds = np.argmax(proba, axis=1) + return preds, proba diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/backtest.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/backtest.py new file mode 100644 index 0000000000000000000000000000000000000000..0accf7a328637edb158806e7ce682fc66080cc50 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/backtest.py @@ -0,0 +1,193 @@ +""" +strategy/backtest.py +Strategy execution, performance metrics, and benchmark calculations. +Supports CASH as a class (earns T-bill rate when selected). +""" + +import numpy as np +import pandas as pd +from datetime import datetime + + +# ── Strategy execution ──────────────────────────────────────────────────────── + +def execute_strategy( + preds: np.ndarray, + proba: np.ndarray, + y_raw_test: np.ndarray, + test_dates: pd.DatetimeIndex, + target_etfs: list, + fee_bps: int, + tbill_rate: float, + include_cash: bool = True, +) -> dict: + """ + Execute strategy from model predictions. + + Args: + preds : [n] integer class predictions + proba : [n, n_classes] softmax probabilities + y_raw_test : [n, n_etfs] actual next-day ETF returns + test_dates : DatetimeIndex aligned with y_raw_test + target_etfs : list of ETF return column names e.g. ["TLT_Ret", ...] + fee_bps : transaction fee in basis points + tbill_rate : annualised 3m T-bill rate (e.g. 0.045) + include_cash: whether CASH is a valid class (index = n_etfs) + + Returns: + dict with keys: + strat_rets, cum_returns, ann_return, sharpe, + hit_ratio, max_dd, max_daily_dd, cum_max, + audit_trail, next_signal, next_proba + """ + n_etfs = len(target_etfs) + daily_tbill = tbill_rate / 252 + today = datetime.now().date() + + strat_rets = [] + audit_trail = [] + + for i, cls in enumerate(preds): + if include_cash and cls == n_etfs: + signal_etf = "CASH" + realized_ret = daily_tbill + else: + cls = min(cls, n_etfs - 1) + signal_etf = target_etfs[cls].replace("_Ret", "") + realized_ret = float(y_raw_test[i][cls]) + + net_ret = realized_ret - (fee_bps / 10000) + strat_rets.append(net_ret) + + trade_date = test_dates[i] + if trade_date.date() < today: + audit_trail.append({ + "Date": trade_date.strftime("%Y-%m-%d"), + "Signal": signal_etf, + "Realized": realized_ret, + "Net_Return": net_ret, + }) + + strat_rets = np.array(strat_rets, dtype=np.float64) + + # Next signal (last prediction) + last_cls = int(preds[-1]) + next_proba = proba[-1] + + if include_cash and last_cls == n_etfs: + next_signal = "CASH" + else: + last_cls = min(last_cls, n_etfs - 1) + next_signal = target_etfs[last_cls].replace("_Ret", "") + + metrics = _compute_metrics(strat_rets, tbill_rate) + + return { + **metrics, + "strat_rets": strat_rets, + "audit_trail": audit_trail, + "next_signal": next_signal, + "next_proba": next_proba, + } + + +# ── Performance metrics ─────────────────────────────────────────────────────── + +def _compute_metrics(strat_rets: np.ndarray, tbill_rate: float) -> dict: + if len(strat_rets) == 0: + return {} + + cum_returns = np.cumprod(1 + strat_rets) + n = len(strat_rets) + ann_return = float(cum_returns[-1] ** (252 / n) - 1) + + excess = strat_rets - tbill_rate / 252 + sharpe = float(np.mean(excess) / (np.std(strat_rets) + 1e-9) * np.sqrt(252)) + + recent = strat_rets[-15:] + hit_ratio = float(np.mean(recent > 0)) + + cum_max = np.maximum.accumulate(cum_returns) + drawdown = (cum_returns - cum_max) / cum_max + max_dd = float(np.min(drawdown)) + max_daily = float(np.min(strat_rets)) + + return { + "cum_returns": cum_returns, + "ann_return": ann_return, + "sharpe": sharpe, + "hit_ratio": hit_ratio, + "max_dd": max_dd, + "max_daily_dd":max_daily, + "cum_max": cum_max, + } + + +def compute_benchmark_metrics(returns: np.ndarray, tbill_rate: float) -> dict: + """Compute metrics for a benchmark return series.""" + return _compute_metrics(returns, tbill_rate) + + +# ── Winner selection ────────────────────────────────────────────────────────── + +def select_winner(results: dict) -> str: + """ + Given a dict of {approach_name: result_dict}, return the approach name + with the highest annualised return (raw, not risk-adjusted). + + Args: + results : {"Approach 1": {...}, "Approach 2": {...}, "Approach 3": {...}} + + Returns: + winner_name : str + """ + best_name = None + best_return = -np.inf + + for name, res in results.items(): + if res is None: + continue + ret = res.get("ann_return", -np.inf) + if ret > best_return: + best_return = ret + best_name = name + + return best_name + + +# ── Comparison table ────────────────────────────────────────────────────────── + +def build_comparison_table(results: dict, winner_name: str) -> pd.DataFrame: + """ + Build a summary DataFrame comparing all three approaches. + + Args: + results : {name: result_dict} + winner_name : name of the winner + + Returns: + pd.DataFrame with one row per approach + """ + rows = [] + for name, res in results.items(): + if res is None: + rows.append({ + "Approach": name, + "Ann. Return": "N/A", + "Sharpe": "N/A", + "Hit Ratio (15d)":"N/A", + "Max Drawdown": "N/A", + "Winner": "", + }) + continue + + rows.append({ + "Approach": name, + "Ann. Return": f"{res['ann_return']*100:.2f}%", + "Sharpe": f"{res['sharpe']:.2f}", + "Hit Ratio (15d)": f"{res['hit_ratio']*100:.0f}%", + "Max Drawdown": f"{res['max_dd']*100:.2f}%", + "Winner": "⭐ WINNER" if name == winner_name else "", + }) + + return pd.DataFrame(rows) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/conviction.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/conviction.py new file mode 100644 index 0000000000000000000000000000000000000000..1d0365c502e88cd0fe5f487ac2c1f902d3b1a813 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/conviction.py @@ -0,0 +1,93 @@ +""" +signals/conviction.py +Signal conviction scoring via Z-score of model probabilities. +""" + +import numpy as np + + +CONVICTION_THRESHOLDS = { + "Very High": 2.0, + "High": 1.0, + "Moderate": 0.0, + # Below 0.0 β†’ "Low" +} + + +def compute_conviction(proba: np.ndarray, target_etfs: list, include_cash: bool = True) -> dict: + """ + Compute Z-score conviction for the selected signal. + + Args: + proba : 1-D softmax probability vector [n_classes] + target_etfs : list of ETF return column names (e.g. ["TLT_Ret", ...]) + include_cash: whether CASH is the last class + + Returns: + dict with keys: + best_idx : int + best_name : str (ETF ticker or "CASH") + z_score : float + label : str ("Very High" / "High" / "Moderate" / "Low") + scores : np.ndarray (raw proba) + etf_names : list of display names + sorted_pairs : list of (name, score) sorted highβ†’low + """ + scores = np.array(proba, dtype=float) + best_idx = int(np.argmax(scores)) + n_etfs = len(target_etfs) + + # Display names + etf_names = [e.replace("_Ret", "") for e in target_etfs] + if include_cash: + etf_names = etf_names + ["CASH"] + + best_name = etf_names[best_idx] if best_idx < len(etf_names) else "CASH" + + # Z-score + mean = np.mean(scores) + std = np.std(scores) + z = float((scores[best_idx] - mean) / std) if std > 1e-9 else 0.0 + + # Label + label = "Low" + for lbl, threshold in CONVICTION_THRESHOLDS.items(): + if z >= threshold: + label = lbl + break + + # Sorted pairs for UI bar chart + sorted_pairs = sorted( + zip(etf_names, scores), + key=lambda x: x[1], + reverse=True, + ) + + return { + "best_idx": best_idx, + "best_name": best_name, + "z_score": z, + "label": label, + "scores": scores, + "etf_names": etf_names, + "sorted_pairs": sorted_pairs, + } + + +def conviction_color(label: str) -> str: + """Return hex accent colour for a conviction label.""" + return { + "Very High": "#00b894", + "High": "#00cec9", + "Moderate": "#fdcb6e", + "Low": "#d63031", + }.get(label, "#888888") + + +def conviction_icon(label: str) -> str: + return { + "Very High": "🟒", + "High": "🟒", + "Moderate": "🟑", + "Low": "πŸ”΄", + }.get(label, "βšͺ") diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py new file mode 100644 index 0000000000000000000000000000000000000000..059d2da8ec8c5a2be4040d1c1702d28f0029361e --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py @@ -0,0 +1,229 @@ +""" +ui/components.py +Reusable Streamlit UI blocks: + - Freshness warning banner + - Next trading day signal banner + - Signal conviction panel + - Metrics row + - Audit trail table + - Comparison summary table +""" + +import streamlit as st +import pandas as pd +import numpy as np + +from signals.conviction import conviction_color, conviction_icon + + +# ── Freshness warning ───────────────────────────────────────────────────────── + +def show_freshness_status(freshness: dict): + """Display data freshness status. Stops app if data is stale.""" + if freshness.get("fresh"): + st.success(freshness["message"]) + else: + st.warning(freshness["message"]) + + +# ── Next trading day banner ─────────────────────────────────────────────────── + +def show_signal_banner(next_signal: str, next_date, approach_name: str): + """Large coloured banner showing the winning approach's next signal.""" + is_cash = next_signal == "CASH" + bg = "linear-gradient(135deg, #2d3436 0%, #1a1a2e 100%)" if is_cash else \ + "linear-gradient(135deg, #00d1b2 0%, #00a896 100%)" + + st.markdown(f""" +
+
+ {approach_name.upper()} Β· NEXT TRADING DAY SIGNAL +
+

+ 🎯 {next_date.strftime('%Y-%m-%d')} β†’ {next_signal} +

+
+ """, unsafe_allow_html=True) + + +# ── Signal conviction panel ─────────────────────────────────────────────────── + +def show_conviction_panel(conviction: dict): + """ + White-background conviction panel with Z-score gauge and per-ETF bars. + Uses separate st.markdown calls per ETF row to avoid Streamlit HTML escaping. + """ + label = conviction["label"] + z_score = conviction["z_score"] + best_name = conviction["best_name"] + sorted_pairs = conviction["sorted_pairs"] + + color = conviction_color(label) + icon = conviction_icon(label) + + z_clipped = max(-3.0, min(3.0, z_score)) + bar_pct = int((z_clipped + 3) / 6 * 100) + + max_score = max(s for _, s in sorted_pairs) if sorted_pairs else 1.0 + if max_score <= 0: + max_score = 1.0 + + # ── Header + gauge ──────────────────────────────────────────────────────── + st.markdown(f""" +
+ +
+ {icon} + Signal Conviction + + Z = {z_score:.2f} σ + + + {label} + +
+ +
+ Weak −3σ + Neutral 0σ + Strong +3σ +
+
+
+
+
+ +
+ Model probability by ETF (ranked high → low): +
+
+ """, unsafe_allow_html=True) + + # ── Per-ETF rows ────────────────────────────────────────────────────────── + for i, (name, score) in enumerate(sorted_pairs): + is_winner = (name == best_name) + is_last = (i == len(sorted_pairs) - 1) + bar_w = int(score / max_score * 100) + name_style = "font-weight:700; color:#00897b;" if is_winner else "color:#444;" + bar_color = color if is_winner else "#b2dfdb" if score > max_score * 0.5 else "#e0e0e0" + star = " β˜…" if is_winner else "" + bottom_r = "0 0 12px 12px" if is_last else "0" + border_bot = "border-bottom:1px solid #f0f0f0;" if not is_last else "" + + st.markdown(f""" +
+
+ {name}{star} +
+
+
+ {score:.4f} +
+
+ """, unsafe_allow_html=True) + + st.caption( + "Z-score = std deviations the top ETF's probability sits above the mean of all ETF probabilities. " + "Higher β†’ model is more decisive." + ) + + +# ── Metrics row ─────────────────────────────────────────────────────────────── + +def show_metrics_row(result: dict, tbill_rate: float): + """Five-column metric display.""" + col1, col2, col3, col4, col5 = st.columns(5) + + col1.metric( + "πŸ“ˆ Annualised Return", + f"{result['ann_return']*100:.2f}%", + delta=f"vs T-bill: {(result['ann_return'] - tbill_rate)*100:.2f}%", + ) + col2.metric( + "πŸ“Š Sharpe Ratio", + f"{result['sharpe']:.2f}", + delta="Risk-Adjusted" if result['sharpe'] > 1 else "Below Threshold", + ) + col3.metric( + "🎯 Hit Ratio (15d)", + f"{result['hit_ratio']*100:.0f}%", + delta="Strong" if result['hit_ratio'] > 0.6 else "Weak", + ) + col4.metric( + "πŸ“‰ Max Drawdown", + f"{result['max_dd']*100:.2f}%", + delta="Peak to Trough", + ) + col5.metric( + "⚠️ Max Daily DD", + f"{result['max_daily_dd']*100:.2f}%", + delta="Worst Day", + ) + + +# ── Comparison table ────────────────────────────────────────────────────────── + +def show_comparison_table(comparison_df: pd.DataFrame): + """Styled comparison table for all three approaches.""" + def highlight_winner(row): + if "WINNER" in str(row.get("Winner", "")): + return ["background-color: rgba(0,200,150,0.15); font-weight:bold"] * len(row) + return [""] * len(row) + + styled = comparison_df.style.apply(highlight_winner, axis=1).set_properties(**{ + "text-align": "center", + "font-size": "14px", + }).set_table_styles([ + {"selector": "th", "props": [("font-size", "14px"), ("font-weight", "bold"), + ("text-align", "center")]}, + {"selector": "td", "props": [("padding", "10px")]}, + ]) + st.dataframe(styled, use_container_width=True) + + +# ── Audit trail ─────────────────────────────────────────────────────────────── + +def show_audit_trail(audit_trail: list): + """Last 20 days styled audit trail.""" + if not audit_trail: + st.info("No audit trail data available.") + return + + df = pd.DataFrame(audit_trail).tail(20)[["Date", "Signal", "Net_Return"]] + + def color_return(val): + return "color: #00c896; font-weight:bold" if val > 0 else "color: #ff4b4b; font-weight:bold" + + styled = df.style.applymap(color_return, subset=["Net_Return"]).format( + {"Net_Return": "{:.2%}"} + ).set_properties(**{ + "font-size": "16px", + "text-align": "center", + }).set_table_styles([ + {"selector": "th", "props": [("font-size", "16px"), ("font-weight", "bold"), + ("text-align", "center")]}, + {"selector": "td", "props": [("padding", "10px")]}, + ]) + st.dataframe(styled, use_container_width=True, height=500) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/charts.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/charts.py new file mode 100644 index 0000000000000000000000000000000000000000..fc3b824bf2f3cbedd7b956d6969fad5e5ecca980 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/charts.py @@ -0,0 +1,144 @@ +""" +ui/charts.py +All Plotly chart builders for the Streamlit UI. +""" + +import numpy as np +import pandas as pd +import plotly.graph_objects as go + + +APPROACH_COLOURS = { + "Approach 1": "#00ffc8", + "Approach 2": "#7c6aff", + "Approach 3": "#ff6b6b", +} +BENCHMARK_COLOURS = { + "SPY": "#ff4b4b", + "AGG": "#ffa500", +} + + +def equity_curve_chart( + results: dict, + winner_name: str, + plot_dates: pd.DatetimeIndex, + df: pd.DataFrame, + test_slice: slice, + tbill_rate: float, +) -> go.Figure: + """ + Equity curve chart showing all three approaches + SPY + AGG benchmarks. + + Args: + results : {approach_name: result_dict} + winner_name : highlighted approach + plot_dates : DatetimeIndex for x-axis + df : full DataFrame (for benchmark columns) + test_slice : slice object to extract test-period benchmark returns + tbill_rate : for benchmark metric calculation + """ + from strategy.backtest import compute_benchmark_metrics + + fig = go.Figure() + + # ── Strategy lines ──────────────────────────────────────────────────────── + for name, res in results.items(): + if res is None: + continue + colour = APPROACH_COLOURS.get(name, "#aaaaaa") + width = 3 if name == winner_name else 1.5 + dash = "solid" if name == winner_name else "dot" + + n = min(len(res["cum_returns"]), len(plot_dates)) + + fig.add_trace(go.Scatter( + x=plot_dates[:n], + y=res["cum_returns"][:n], + mode="lines", + name=f"{name} {'β˜…' if name == winner_name else ''}", + line=dict(color=colour, width=width, dash=dash), + fill="tozeroy" if name == winner_name else None, + fillcolor=f"rgba({_hex_to_rgb(colour)},0.07)" if name == winner_name else None, + )) + + # ── Benchmark: SPY ──────────────────────────────────────────────────────── + if "SPY_Ret" in df.columns: + spy_rets = df["SPY_Ret"].iloc[test_slice].values + n = min(len(spy_rets), len(plot_dates)) + spy_m = compute_benchmark_metrics(spy_rets[:n], tbill_rate) + fig.add_trace(go.Scatter( + x=plot_dates[:n], + y=spy_m["cum_returns"], + mode="lines", + name="SPY (Equity BM)", + line=dict(color=BENCHMARK_COLOURS["SPY"], width=1.5, dash="dot"), + )) + + # ── Benchmark: AGG ──────────────────────────────────────────────────────── + if "AGG_Ret" in df.columns: + agg_rets = df["AGG_Ret"].iloc[test_slice].values + n = min(len(agg_rets), len(plot_dates)) + agg_m = compute_benchmark_metrics(agg_rets[:n], tbill_rate) + fig.add_trace(go.Scatter( + x=plot_dates[:n], + y=agg_m["cum_returns"], + mode="lines", + name="AGG (Bond BM)", + line=dict(color=BENCHMARK_COLOURS["AGG"], width=1.5, dash="dot"), + )) + + fig.update_layout( + template="plotly_dark", + height=460, + hovermode="x unified", + showlegend=True, + legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01, font=dict(size=11)), + xaxis_title="Date", + yaxis_title="Cumulative Return (Γ—)", + margin=dict(l=50, r=30, t=20, b=50), + ) + return fig + + +def comparison_bar_chart(results: dict, winner_name: str) -> go.Figure: + """ + Horizontal bar chart comparing annualised returns across all three approaches. + """ + names = [] + returns = [] + colours = [] + + for name, res in results.items(): + if res is None: + continue + names.append(name) + returns.append(res["ann_return"] * 100) + colours.append(APPROACH_COLOURS.get(name, "#aaaaaa")) + + fig = go.Figure(go.Bar( + x=returns, + y=names, + orientation="h", + marker_color=colours, + text=[f"{r:.1f}%" for r in returns], + textposition="auto", + )) + + fig.update_layout( + template="plotly_dark", + height=200, + xaxis_title="Annualised Return (%)", + margin=dict(l=100, r=30, t=10, b=40), + showlegend=False, + ) + return fig + + +# ── Helper ──────────────────────────────────────────────────────────────────── + +def _hex_to_rgb(hex_color: str) -> str: + """Convert #rrggbb to 'r,g,b' string for rgba().""" + h = hex_color.lstrip("#") + r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16) + return f"{r},{g},{b}" diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/calendar.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/calendar.py new file mode 100644 index 0000000000000000000000000000000000000000..08173038e3b64d22c91f1b3558b2e1aa9d30ac96 --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/calendar.py @@ -0,0 +1,91 @@ +""" +utils/calendar.py +NYSE calendar utilities: + - Next trading day for signal display + - Market open check + - EST time helper +""" + +from datetime import datetime, timedelta +import pytz + +try: + import pandas_market_calendars as mcal + NYSE_CAL_AVAILABLE = True +except ImportError: + NYSE_CAL_AVAILABLE = False + + +def get_est_time() -> datetime: + """Return current datetime in US/Eastern timezone.""" + return datetime.now(pytz.timezone("US/Eastern")) + + +def is_market_open_today() -> bool: + """Return True if today is a NYSE trading day.""" + today = get_est_time().date() + if NYSE_CAL_AVAILABLE: + try: + nyse = mcal.get_calendar("NYSE") + schedule = nyse.schedule(start_date=today, end_date=today) + return len(schedule) > 0 + except Exception: + pass + return today.weekday() < 5 + + +def get_next_signal_date() -> datetime.date: + """ + Determine the date for which the model's signal applies. + + Rules: + - If today is a NYSE trading day AND it is before 09:30 EST + β†’ signal applies to TODAY (market hasn't opened yet) + - Otherwise + β†’ signal applies to the NEXT NYSE trading day + """ + now_est = get_est_time() + today = now_est.date() + + market_not_open_yet = ( + now_est.hour < 9 or + (now_est.hour == 9 and now_est.minute < 30) + ) + + if NYSE_CAL_AVAILABLE: + try: + nyse = mcal.get_calendar("NYSE") + schedule = nyse.schedule( + start_date=today, + end_date=today + timedelta(days=10), + ) + if len(schedule) == 0: + return today # fallback + + first_day = schedule.index[0].date() + + # Today is a trading day and market hasn't opened β†’ today + if first_day == today and market_not_open_yet: + return today + + # Otherwise find first trading day strictly after today + for ts in schedule.index: + d = ts.date() + if d > today: + return d + + return schedule.index[-1].date() + except Exception: + pass + + # Fallback: simple weekend skip + candidate = today if market_not_open_yet else today + timedelta(days=1) + while candidate.weekday() >= 5: + candidate += timedelta(days=1) + return candidate + + +def is_sync_window() -> bool: + """True if current EST time is in the 07:00-08:00 or 19:00-20:00 window.""" + now = get_est_time() + return (7 <= now.hour < 8) or (19 <= now.hour < 20) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/__init__.py @@ -0,0 +1 @@ + diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py @@ -0,0 +1 @@ + diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/__init__.py @@ -0,0 +1 @@ + diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/__init__.py @@ -0,0 +1 @@ + diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/__init__.py @@ -0,0 +1 @@ + diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/models/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3c159a665535afa10f8749a7c189adbf36f741fb --- /dev/null +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/models/__init__.py @@ -0,0 +1 @@ +# models package diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py index 8b137891791fe96927ad78e64b0aad7bded08bdc..e03276e53254a67de392ee21504954c3335cffbf 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py @@ -1 +1 @@ - +# strategy package diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/__init__.py index 8b137891791fe96927ad78e64b0aad7bded08bdc..e03276e53254a67de392ee21504954c3335cffbf 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/__init__.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/__init__.py @@ -1 +1 @@ - +# strategy package diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py index e03276e53254a67de392ee21504954c3335cffbf..756904e3de38acf0c95703618fce8211a8d9dfb1 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py @@ -1 +1 @@ -# strategy package +# signals package diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/__init__.py index 8b137891791fe96927ad78e64b0aad7bded08bdc..67e55b5da6cef44a58a742147192cab1df55e857 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/__init__.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/__init__.py @@ -1 +1 @@ - +# ui package diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/__init__.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/__init__.py index 8b137891791fe96927ad78e64b0aad7bded08bdc..db3e3278dabf73020f4a939cd10bdded2bc377e8 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/__init__.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/__init__.py @@ -1 +1 @@ - +# utils package diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py index 8b137891791fe96927ad78e64b0aad7bded08bdc..f384e58a9a95cbbb880c5183ea6075a3eca14da0 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py @@ -1 +1,217 @@ +""" +models/approach2_regime.py +Approach 2: Regime-Conditioned CNN-LSTM +Pipeline: + Raw macro signals + -> CNN Tower (64 filters, k=3) -> feature vector + -> Regime Classifier (HMM on VIX + HY spread + T10Y2Y) -> one-hot [4] + -> Concatenate CNN features + regime embedding + -> LSTM (128 units) + -> Dense 64 -> Softmax (n_etfs + 1 CASH) + +NOTE: tensorflow and hmmlearn are imported lazily inside functions +to prevent module-level import failures from making this module +appear broken to Python's import system. +""" + +import numpy as np + +N_REGIMES = 4 +REGIME_HINTS = ["VIX", "HY", "Spread", "T10Y2Y", "T10Y3M", "Credit"] + + +# --------------------------------------------------------------------------- +# Regime detection helpers +# --------------------------------------------------------------------------- + +def _get_regime_cols(feature_names: list) -> list: + return [ + f for f in feature_names + if any(hint.lower() in f.lower() for hint in REGIME_HINTS) + ] + + +def fit_regime_model(X_flat: np.ndarray, feature_names: list, + n_regimes: int = N_REGIMES): + """ + Fit a Gaussian HMM on regime-relevant macro features. + Returns (hmm_model, regime_cols_idx). + hmm_model is None if hmmlearn is unavailable or fitting fails. + """ + regime_col_names = _get_regime_cols(feature_names) + if not regime_col_names: + regime_col_names = feature_names[:min(3, len(feature_names))] + + regime_cols_idx = [ + feature_names.index(c) for c in regime_col_names + if c in feature_names + ] + X_regime = X_flat[:, regime_cols_idx] + + try: + from hmmlearn.hmm import GaussianHMM + hmm = GaussianHMM( + n_components=n_regimes, + covariance_type="diag", + n_iter=100, + random_state=42, + ) + hmm.fit(X_regime) + return hmm, regime_cols_idx + except Exception as e: + print(f"[Approach 2] HMM fitting failed: {e}. Using fallback.") + return None, regime_cols_idx + + +def predict_regimes(hmm_model, X_flat: np.ndarray, + regime_cols_idx: list, + n_regimes: int = N_REGIMES) -> np.ndarray: + """Predict integer regime label for each day.""" + X_regime = X_flat[:, regime_cols_idx] + + if hmm_model is not None: + try: + return hmm_model.predict(X_regime) + except Exception: + pass + + # Fallback: quantile binning on first regime feature + feat = X_regime[:, 0] + quantiles = np.percentile(feat, np.linspace(0, 100, n_regimes + 1)) + return np.digitize(feat, quantiles[1:-1]).astype(int) + + +def regimes_to_onehot(regimes: np.ndarray, + n_regimes: int = N_REGIMES) -> np.ndarray: + one_hot = np.zeros((len(regimes), n_regimes), dtype=np.float32) + for i, r in enumerate(regimes): + one_hot[i, min(int(r), n_regimes - 1)] = 1.0 + return one_hot + + +def build_regime_sequences(X_seq: np.ndarray, + regimes_flat: np.ndarray, + lookback: int) -> np.ndarray: + n_samples = X_seq.shape[0] + aligned = regimes_flat[lookback: lookback + n_samples] + return regimes_to_onehot(aligned) + + +# --------------------------------------------------------------------------- +# Model builder +# --------------------------------------------------------------------------- + +def build_regime_cnn_lstm(seq_input_shape: tuple, + n_classes: int, + n_regimes: int = N_REGIMES, + dropout: float = 0.3, + lstm_units: int = 128): + """Build and compile the regime-conditioned CNN-LSTM model.""" + from tensorflow import keras + from models.base import classification_head + + seq_input = keras.Input(shape=seq_input_shape, name="seq_input") + x = keras.layers.Conv1D(64, kernel_size=3, padding="causal", + activation="relu")(seq_input) + x = keras.layers.BatchNormalization()(x) + x = keras.layers.MaxPooling1D(pool_size=2)(x) + x = keras.layers.Conv1D(32, kernel_size=3, padding="causal", + activation="relu")(x) + x = keras.layers.BatchNormalization()(x) + x = keras.layers.Dropout(dropout)(x) + cnn_out = keras.layers.GlobalAveragePooling1D()(x) + + regime_input = keras.Input(shape=(n_regimes,), name="regime_input") + regime_emb = keras.layers.Dense(8, activation="relu")(regime_input) + + merged = keras.layers.Concatenate()([cnn_out, regime_emb]) + x = keras.layers.Reshape((1, merged.shape[-1]))(merged) + x = keras.layers.LSTM(lstm_units, dropout=dropout)(x) + + outputs = classification_head(x, n_classes, dropout) + + model = keras.Model( + inputs=[seq_input, regime_input], + outputs=outputs, + name="Approach2_Regime_CNN_LSTM", + ) + model.compile( + optimizer=keras.optimizers.Adam(learning_rate=1e-3), + loss="sparse_categorical_crossentropy", + metrics=["accuracy"], + ) + return model + + +# --------------------------------------------------------------------------- +# Training pipeline +# --------------------------------------------------------------------------- + +def train_approach2( + X_train, y_train, + X_val, y_val, + X_flat_all: np.ndarray, + feature_names: list, + lookback: int, + train_size: int, + val_size: int, + n_classes: int, + epochs: int = 100, + batch_size: int = 32, + dropout: float = 0.3, + lstm_units: int = 128, +): + """ + Fit HMM regime model then train the regime-conditioned CNN-LSTM. + Returns: model, history, hmm_model, regime_cols_idx + """ + from models.base import get_callbacks + + X_flat_train = X_flat_all[:train_size + lookback] + hmm_model, regime_cols_idx = fit_regime_model(X_flat_train, feature_names) + + regimes_all = predict_regimes(hmm_model, X_flat_all, regime_cols_idx) + + R_train = build_regime_sequences(X_train, regimes_all, lookback) + R_val = build_regime_sequences(X_val, regimes_all, lookback + train_size) + + model = build_regime_cnn_lstm( + X_train.shape[1:], n_classes, + dropout=dropout, lstm_units=lstm_units, + ) + + history = model.fit( + [X_train, R_train], y_train, + validation_data=([X_val, R_val], y_val), + epochs=epochs, + batch_size=batch_size, + callbacks=get_callbacks(), + verbose=0, + ) + + return model, history, hmm_model, regime_cols_idx + + +# --------------------------------------------------------------------------- +# Inference +# --------------------------------------------------------------------------- + +def predict_approach2( + model, + X_test: np.ndarray, + X_flat_all: np.ndarray, + regime_cols_idx: list, + hmm_model, + lookback: int, + train_size: int, + val_size: int, +) -> tuple: + """Predict on test set with regime conditioning. Returns (preds, proba).""" + regimes_all = predict_regimes(hmm_model, X_flat_all, regime_cols_idx) + offset = lookback + train_size + val_size + R_test = build_regime_sequences(X_test, regimes_all, offset) + + proba = model.predict([X_test, R_test], verbose=0) + preds = np.argmax(proba, axis=1) + return preds, proba diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py index 51a86329f292dd5bc931ddb615fb3fc76bf4a3fa..b8f6d0445d0bdd6c7d364b1c63d52cc7c37f8143 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py @@ -1,18 +1,16 @@ """ models/base.py -Shared utilities for all three CNN-LSTM variants: - - Data preparation (sequences, train/val/test split) - - Common Keras layers / callbacks - - Predict + evaluate helpers +Shared utilities for all three CNN-LSTM variants. +Key fix: class_weight support to prevent majority-class collapse. """ import numpy as np import pandas as pd from sklearn.preprocessing import RobustScaler +from sklearn.utils.class_weight import compute_class_weight import tensorflow as tf from tensorflow import keras -# ── Reproducibility ─────────────────────────────────────────────────────────── SEED = 42 tf.random.set_seed(SEED) np.random.seed(SEED) @@ -23,15 +21,7 @@ np.random.seed(SEED) def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int): """ Build supervised sequences for CNN-LSTM input. - - Args: - features : 2-D array [n_days, n_features] - targets : 2-D array [n_days, n_etfs] (raw returns) - lookback : number of past days per sample - - Returns: - X : [n_samples, lookback, n_features] - y : [n_samples, n_etfs] (raw returns for the next day) + X[i] = features[i : i+lookback] β†’ predicts y[i+lookback] """ X, y = [], [] for i in range(lookback, len(features)): @@ -43,11 +33,9 @@ def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int): # ── Train / val / test split ────────────────────────────────────────────────── def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15): - """Split sequences into train / val / test preserving temporal order.""" - n = len(X) + n = len(X) t1 = int(n * train_pct) t2 = int(n * (train_pct + val_pct)) - return ( X[:t1], y[:t1], X[t1:t2], y[t1:t2], @@ -58,56 +46,66 @@ def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15): # ── Feature scaling ─────────────────────────────────────────────────────────── def scale_features(X_train, X_val, X_test): - """ - Fit RobustScaler on training data only, apply to val and test. - Operates on the flattened feature dimension. - - Returns scaled arrays with same shape as inputs. - """ - n_train, lb, n_feat = X_train.shape - scaler = RobustScaler() - - # Fit on train + n_feat = X_train.shape[2] + scaler = RobustScaler() scaler.fit(X_train.reshape(-1, n_feat)) - def _transform(X): - shape = X.shape - return scaler.transform(X.reshape(-1, n_feat)).reshape(shape) + def _t(X): + s = X.shape + return scaler.transform(X.reshape(-1, n_feat)).reshape(s) - return _transform(X_train), _transform(X_val), _transform(X_test), scaler + return _t(X_train), _t(X_val), _t(X_test), scaler -# ── Label builder (classification: argmax of returns) ──────────────────────── +# ── Label builder ───────────────────────────────────────────────────────────── def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0): """ - Convert raw return matrix to integer class labels. - - If include_cash=True, adds a CASH class (index = n_etfs) when - the best ETF return is below cash_threshold. - - Args: - y_raw : [n_samples, n_etfs] - include_cash : whether to allow CASH class - cash_threshold : minimum ETF return to prefer over CASH - - Returns: - labels : [n_samples] integer class indices + Assign label = argmax(returns). + If include_cash and best return < cash_threshold β†’ label = n_etfs (CASH). """ - best = np.argmax(y_raw, axis=1) + best = np.argmax(y_raw, axis=1) if include_cash: - best_return = y_raw[np.arange(len(y_raw)), best] - cash_idx = y_raw.shape[1] - labels = np.where(best_return < cash_threshold, cash_idx, best) + best_ret = y_raw[np.arange(len(y_raw)), best] + cash_idx = y_raw.shape[1] + labels = np.where(best_ret < cash_threshold, cash_idx, best) else: labels = best return labels.astype(np.int32) -# ── Common Keras callbacks ──────────────────────────────────────────────────── +# ── Class weights ───────────────────────────────────────────────────────────── + +def compute_class_weights(y_labels: np.ndarray, n_classes: int) -> dict: + """ + Compute balanced class weights to counteract majority-class collapse. + Returns dict {class_index: weight} for use in model.fit(). + """ + classes = np.arange(n_classes) + present = np.unique(y_labels) -def get_callbacks(patience_es=15, patience_lr=8, min_lr=1e-6): - """Standard early stopping + reduce-LR callbacks shared by all models.""" + try: + weights = compute_class_weight( + class_weight="balanced", + classes=present, + y=y_labels, + ) + weight_dict = {int(c): float(w) for c, w in zip(present, weights)} + except Exception: + weight_dict = {} + + # Fill any missing classes with weight 1.0 + for c in classes: + if c not in weight_dict: + weight_dict[c] = 1.0 + + return weight_dict + + +# ── Callbacks ───────────────────────────────────────────────────────────────── + +def get_callbacks(patience_es=20, patience_lr=10, min_lr=1e-6): + """Longer patience to allow models time to learn past majority class.""" return [ keras.callbacks.EarlyStopping( monitor="val_loss", @@ -125,75 +123,51 @@ def get_callbacks(patience_es=15, patience_lr=8, min_lr=1e-6): ] -# ── Common output head ──────────────────────────────────────────────────────── +# ── Output head ─────────────────────────────────────────────────────────────── def classification_head(x, n_classes: int, dropout: float = 0.3): - """ - Shared dense output head for all three CNN-LSTM variants. - - Args: - x : input tensor - n_classes : number of ETF classes (+ 1 for CASH if applicable) - dropout : dropout rate - - Returns: - output tensor with softmax activation - """ x = keras.layers.Dense(64, activation="relu")(x) + x = keras.layers.BatchNormalization()(x) x = keras.layers.Dropout(dropout)(x) + x = keras.layers.Dense(32, activation="relu")(x) + x = keras.layers.Dropout(dropout / 2)(x) x = keras.layers.Dense(n_classes, activation="softmax")(x) return x -# ── Prediction helper ───────────────────────────────────────────────────────── +# ── Prediction ──────────────────────────────────────────────────────────────── -def predict_classes(model, X_test: np.ndarray) -> np.ndarray: - """Return integer class predictions from a Keras model.""" +def predict_classes(model, X_test: np.ndarray) -> tuple: proba = model.predict(X_test, verbose=0) return np.argmax(proba, axis=1), proba -# ── Metrics helper ──────────────────────────────────────────────────────────── +# ── Metrics ─────────────────────────────────────────────────────────────────── def evaluate_returns( - preds: np.ndarray, - proba: np.ndarray, - y_raw_test: np.ndarray, - target_etfs: list, - tbill_rate: float, - fee_bps: int, - include_cash: bool = True, + preds, proba, y_raw_test, target_etfs, tbill_rate, fee_bps, include_cash=True, ): - """ - Given integer class predictions and raw return matrix, - compute strategy returns and summary metrics. - - Returns: - strat_rets : np.ndarray of daily net returns - ann_return : annualised return (float) - cum_returns : cumulative return series - last_proba : probability vector for the last prediction - next_etf : name of ETF predicted for next session - """ n_etfs = len(target_etfs) - strat_rets = [] + daily_tbill = tbill_rate / 252 + strat_rets = [] for i, cls in enumerate(preds): if include_cash and cls == n_etfs: - # CASH: earn daily T-bill rate - daily_tbill = tbill_rate / 252 - net = daily_tbill - (fee_bps / 10000) + net = daily_tbill - fee_bps / 10000 else: - ret = y_raw_test[i][cls] - net = ret - (fee_bps / 10000) + cls = min(int(cls), n_etfs - 1) + net = float(y_raw_test[i][cls]) - fee_bps / 10000 strat_rets.append(net) strat_rets = np.array(strat_rets) cum_returns = np.cumprod(1 + strat_rets) - ann_return = (cum_returns[-1] ** (252 / len(strat_rets))) - 1 + ann_return = cum_returns[-1] ** (252 / len(strat_rets)) - 1 last_proba = proba[-1] next_cls = int(np.argmax(last_proba)) - next_etf = "CASH" if (include_cash and next_cls == n_etfs) else target_etfs[next_cls].replace("_Ret", "") + next_etf = ( + "CASH" if (include_cash and next_cls == n_etfs) + else target_etfs[min(next_cls, n_etfs - 1)].replace("_Ret", "") + ) return strat_rets, ann_return, cum_returns, last_proba, next_etf diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py index 8c946f7ebacbb83f1b50b6df2563c2aafd3af10e..64a88a2b63192fbb52940b88495eff46f2e54006 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py @@ -1,167 +1,86 @@ """ models/approach1_wavelet.py Approach 1: Wavelet Decomposition CNN-LSTM - -Pipeline: - Raw macro signals - β†’ DWT (db4, level=3) per signal β†’ multi-band channel stack - β†’ 1D CNN (64 filters, k=3) β†’ MaxPool β†’ (32 filters, k=3) - β†’ LSTM (128 units) - β†’ Dense 64 β†’ Softmax (n_etfs + 1 CASH) +With class weights to prevent majority-class collapse. """ import numpy as np import pywt -import tensorflow as tf -from tensorflow import keras -from models.base import classification_head, get_callbacks - -WAVELET = "db4" -LEVEL = 3 +WAVELET = "db4" +LEVEL = 3 -# ── Wavelet feature engineering ─────────────────────────────────────────────── def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> np.ndarray: - """ - Decompose a 1-D signal into DWT subbands and return them stacked. - - For a signal of length T: - coeffs = [cA_n, cD_n, cD_{n-1}, ..., cD_1] - We interpolate each subband back to length T so we can stack them. - - Returns: array of shape [T, level+1] - """ T = len(signal) coeffs = pywt.wavedec(signal, wavelet, level=level) bands = [] for c in coeffs: - # Interpolate back to original length - band = np.interp( - np.linspace(0, len(c) - 1, T), - np.arange(len(c)), - c, - ) + band = np.interp(np.linspace(0, len(c)-1, T), np.arange(len(c)), c) bands.append(band) - return np.stack(bands, axis=-1) # [T, level+1] - + return np.stack(bands, axis=-1) -def apply_wavelet_transform(X: np.ndarray, wavelet: str = WAVELET, level: int = LEVEL) -> np.ndarray: - """ - Apply DWT to every feature channel across all samples. - Args: - X : [n_samples, lookback, n_features] - - Returns: - X_wt : [n_samples, lookback, n_features * (level+1)] - """ +def apply_wavelet_transform(X: np.ndarray, wavelet=WAVELET, level=LEVEL) -> np.ndarray: n_samples, lookback, n_features = X.shape - n_bands = level + 1 - X_wt = np.zeros((n_samples, lookback, n_features * n_bands), dtype=np.float32) - + n_bands = level + 1 + X_wt = np.zeros((n_samples, lookback, n_features * n_bands), dtype=np.float32) for s in range(n_samples): for f in range(n_features): - decomposed = _wavelet_decompose_signal(X[s, :, f], wavelet, level) # [T, n_bands] + decomposed = _wavelet_decompose_signal(X[s, :, f], wavelet, level) start = f * n_bands X_wt[s, :, start: start + n_bands] = decomposed - return X_wt -# ── Model builder ───────────────────────────────────────────────────────────── - -def build_wavelet_cnn_lstm( - input_shape: tuple, - n_classes: int, - dropout: float = 0.3, - lstm_units: int = 128, -) -> keras.Model: - """ - Build Wavelet CNN-LSTM model. +def build_wavelet_cnn_lstm(input_shape, n_classes, dropout=0.3, lstm_units=128): + from tensorflow import keras + from models.base import classification_head - Args: - input_shape : (lookback, n_features * n_bands) β€” post-DWT shape - n_classes : number of output classes (ETFs + CASH) - dropout : dropout rate - lstm_units : LSTM hidden size - - Returns: - Compiled Keras model - """ - inputs = keras.Input(shape=input_shape, name="wavelet_input") - - # CNN block 1 - x = keras.layers.Conv1D(64, kernel_size=3, padding="causal", activation="relu")(inputs) + inputs = keras.Input(shape=input_shape) + x = keras.layers.Conv1D(64, 3, padding="causal", activation="relu")(inputs) x = keras.layers.BatchNormalization()(x) - x = keras.layers.MaxPooling1D(pool_size=2)(x) - - # CNN block 2 - x = keras.layers.Conv1D(32, kernel_size=3, padding="causal", activation="relu")(x) + x = keras.layers.MaxPooling1D(2)(x) + x = keras.layers.Conv1D(32, 3, padding="causal", activation="relu")(x) x = keras.layers.BatchNormalization()(x) x = keras.layers.Dropout(dropout)(x) - - # LSTM x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(x) - - # Output head outputs = classification_head(x, n_classes, dropout) - model = keras.Model(inputs, outputs, name="Approach1_Wavelet_CNN_LSTM") + model = keras.Model(inputs, outputs, name="Approach1_Wavelet") model.compile( - optimizer=keras.optimizers.Adam(learning_rate=1e-3), + optimizer=keras.optimizers.Adam(1e-3), loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) return model -# ── Full train pipeline ─────────────────────────────────────────────────────── - def train_approach1( - X_train, y_train, - X_val, y_val, - n_classes: int, - epochs: int = 100, - batch_size: int = 32, - dropout: float = 0.3, - lstm_units: int = 128, + X_train, y_train, X_val, y_val, + n_classes, epochs=100, batch_size=32, dropout=0.3, lstm_units=128, ): - """ - Apply wavelet transform then train the CNN-LSTM. - - Args: - X_train/val : [n, lookback, n_features] (scaled, pre-wavelet) - y_train/val : [n] integer class labels - n_classes : total output classes - - Returns: - model : trained Keras model - history : training history - wt_shape : post-DWT input shape (for inference) - """ - # Apply DWT - X_train_wt = apply_wavelet_transform(X_train) - X_val_wt = apply_wavelet_transform(X_val) - - input_shape = X_train_wt.shape[1:] # (lookback, n_features * n_bands) + from models.base import get_callbacks, compute_class_weights + + X_train_wt = apply_wavelet_transform(X_train) + X_val_wt = apply_wavelet_transform(X_val) + input_shape = X_train_wt.shape[1:] model = build_wavelet_cnn_lstm(input_shape, n_classes, dropout, lstm_units) + cw = compute_class_weights(y_train, n_classes) history = model.fit( X_train_wt, y_train, validation_data=(X_val_wt, y_val), epochs=epochs, batch_size=batch_size, + class_weight=cw, callbacks=get_callbacks(), verbose=0, ) - return model, history, input_shape def predict_approach1(model, X_test: np.ndarray) -> tuple: - """Apply DWT to test set then predict. Returns (class_preds, proba).""" X_test_wt = apply_wavelet_transform(X_test) proba = model.predict(X_test_wt, verbose=0) - preds = np.argmax(proba, axis=1) - return preds, proba + return np.argmax(proba, axis=1), proba diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py index f384e58a9a95cbbb880c5183ea6075a3eca14da0..876b6d1f75c7b37a76b8464349043a3236df07be 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py @@ -166,7 +166,7 @@ def train_approach2( Fit HMM regime model then train the regime-conditioned CNN-LSTM. Returns: model, history, hmm_model, regime_cols_idx """ - from models.base import get_callbacks + from models.base import get_callbacks, compute_class_weights X_flat_train = X_flat_all[:train_size + lookback] hmm_model, regime_cols_idx = fit_regime_model(X_flat_train, feature_names) @@ -181,11 +181,14 @@ def train_approach2( dropout=dropout, lstm_units=lstm_units, ) + cw = compute_class_weights(y_train, n_classes) + history = model.fit( [X_train, R_train], y_train, validation_data=([X_val, R_val], y_val), epochs=epochs, batch_size=batch_size, + class_weight=cw, callbacks=get_callbacks(), verbose=0, ) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py index 1b4e0821dacfec060ff2276feece20a75fc856cf..09014aae1de9efef3f00618cfdac27addc7aed4c 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py @@ -1,150 +1,80 @@ """ models/approach3_multiscale.py Approach 3: Multi-Scale Parallel CNN-LSTM - -Pipeline: - Raw macro signals - β†’ 3 parallel CNN towers: kernel 3 (short), 7 (medium), 21 (long) - β†’ Concatenate [96 features] - β†’ LSTM (128 units) - β†’ Dense 64 β†’ Softmax (n_etfs + 1 CASH) +With class weights to prevent majority-class collapse. """ import numpy as np -import tensorflow as tf -from tensorflow import keras -from models.base import classification_head, get_callbacks - -# Kernel sizes represent: momentum (3d), weekly cycle (7d), monthly trend (21d) -KERNEL_SIZES = [3, 7, 21] -FILTERS_EACH = 32 # 32 Γ— 3 towers = 96 concatenated features +KERNEL_SIZES = [3, 7, 21] +FILTERS_EACH = 32 -# ── Model builder ───────────────────────────────────────────────────────────── def build_multiscale_cnn_lstm( - input_shape: tuple, - n_classes: int, - kernel_sizes: list = None, - filters: int = FILTERS_EACH, - dropout: float = 0.3, - lstm_units: int = 128, -) -> keras.Model: - """ - Multi-scale parallel CNN-LSTM. - - Three CNN towers with different kernel sizes run in parallel on the - same input, capturing momentum, weekly cycle, and monthly trend - simultaneously. Their outputs are concatenated before the LSTM. - - Args: - input_shape : (lookback, n_features) - n_classes : number of output classes (ETFs + CASH) - kernel_sizes : list of kernel sizes for each tower - filters : number of Conv1D filters per tower - dropout : dropout rate - lstm_units : LSTM hidden size - - Returns: - Compiled Keras model - """ + input_shape, n_classes, kernel_sizes=None, + filters=FILTERS_EACH, dropout=0.3, lstm_units=128, +): + from tensorflow import keras + from models.base import classification_head + if kernel_sizes is None: kernel_sizes = KERNEL_SIZES - inputs = keras.Input(shape=input_shape, name="multiscale_input") + inputs = keras.Input(shape=input_shape, name="multiscale_input") + towers = [] - towers = [] for k in kernel_sizes: - # Each tower: Conv β†’ BN β†’ Conv β†’ BN β†’ GlobalAvgPool - t = keras.layers.Conv1D( - filters, kernel_size=k, padding="causal", activation="relu", - name=f"conv1_k{k}" - )(inputs) + t = keras.layers.Conv1D(filters, k, padding="causal", activation="relu", + name=f"conv1_k{k}")(inputs) t = keras.layers.BatchNormalization(name=f"bn1_k{k}")(t) - t = keras.layers.Conv1D( - filters, kernel_size=k, padding="causal", activation="relu", - name=f"conv2_k{k}" - )(t) + t = keras.layers.Conv1D(filters, k, padding="causal", activation="relu", + name=f"conv2_k{k}")(t) t = keras.layers.BatchNormalization(name=f"bn2_k{k}")(t) t = keras.layers.Dropout(dropout, name=f"drop_k{k}")(t) towers.append(t) - # Concatenate along the feature dimension β€” keeps temporal axis intact for LSTM - if len(towers) > 1: - merged = keras.layers.Concatenate(axis=-1, name="tower_concat")(towers) - else: - merged = towers[0] - - # LSTM integrates multi-scale temporal features - x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1, name="lstm")(merged) - - # Output head + merged = keras.layers.Concatenate(axis=-1)(towers) if len(towers) > 1 else towers[0] + x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(merged) outputs = classification_head(x, n_classes, dropout) - model = keras.Model(inputs, outputs, name="Approach3_MultiScale_CNN_LSTM") + model = keras.Model(inputs, outputs, name="Approach3_MultiScale") model.compile( - optimizer=keras.optimizers.Adam(learning_rate=1e-3), + optimizer=keras.optimizers.Adam(1e-3), loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) return model -# ── Full train pipeline ─────────────────────────────────────────────────────── - def train_approach3( - X_train, y_train, - X_val, y_val, - n_classes: int, - epochs: int = 100, - batch_size: int = 32, - dropout: float = 0.3, - lstm_units: int = 128, - kernel_sizes: list = None, + X_train, y_train, X_val, y_val, + n_classes, epochs=100, batch_size=32, + dropout=0.3, lstm_units=128, kernel_sizes=None, ): - """ - Build and train the multi-scale CNN-LSTM. - - Args: - X_train/val : [n, lookback, n_features] - y_train/val : [n] integer class labels - n_classes : total output classes - - Returns: - model : trained Keras model - history : training history - """ + from models.base import get_callbacks, compute_class_weights + if kernel_sizes is None: kernel_sizes = KERNEL_SIZES - # Guard: lookback must be >= largest kernel - lookback = X_train.shape[1] - valid_kernels = [k for k in kernel_sizes if k <= lookback] - if not valid_kernels: - valid_kernels = [min(3, lookback)] - - model = build_multiscale_cnn_lstm( - input_shape=X_train.shape[1:], - n_classes=n_classes, - kernel_sizes=valid_kernels, - dropout=dropout, - lstm_units=lstm_units, + lookback = X_train.shape[1] + valid_kernels = [k for k in kernel_sizes if k <= lookback] or [min(3, lookback)] + model = build_multiscale_cnn_lstm( + X_train.shape[1:], n_classes, valid_kernels, dropout=dropout, lstm_units=lstm_units, ) + cw = compute_class_weights(y_train, n_classes) history = model.fit( X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, batch_size=batch_size, + class_weight=cw, callbacks=get_callbacks(), verbose=0, ) - return model, history def predict_approach3(model, X_test: np.ndarray) -> tuple: - """Predict on test set. Returns (class_preds, proba).""" proba = model.predict(X_test, verbose=0) - preds = np.argmax(proba, axis=1) - return preds, proba + return np.argmax(proba, axis=1), proba diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py index 059d2da8ec8c5a2be4040d1c1702d28f0029361e..29f5de28e77d47460b82b0c1dad7249a005d6cee 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py @@ -227,3 +227,46 @@ def show_audit_trail(audit_trail: list): {"selector": "td", "props": [("padding", "10px")]}, ]) st.dataframe(styled, use_container_width=True, height=500) + + +# ── All models' next day signals panel ─────────────────────────────────────── + +def show_all_signals_panel(all_signals: dict, target_etfs: list, include_cash: bool, next_date): + """ + Compact panel showing what each model predicts for next trading day, + with top probability displayed. + """ + APPROACH_COLORS = { + "Approach 1": "#00ffc8", + "Approach 2": "#7c6aff", + "Approach 3": "#ff6b6b", + } + + st.subheader(f"πŸ—“οΈ All Models β€” {next_date.strftime('%Y-%m-%d')} Signals") + + cols = st.columns(len(all_signals)) + for col, (name, info) in zip(cols, all_signals.items()): + color = APPROACH_COLORS.get(name, "#888888") + signal = info["signal"] + proba = info["proba"] + top_prob = float(np.max(proba)) * 100 + is_winner = info["is_winner"] + border = f"3px solid {color}" + badge = " ⭐ WINNER" if is_winner else "" + + col.markdown(f""" +
+
+ {name.upper()}{badge} +
+
+ {signal} +
+
+ Top prob: {top_prob:.1f}% +
+
+ """, unsafe_allow_html=True) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/backtest.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/backtest.py index 0accf7a328637edb158806e7ce682fc66080cc50..12944a14a29289e3a47eaadf425bd64c777199b7 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/backtest.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/backtest.py @@ -55,6 +55,8 @@ def execute_strategy( cls = min(cls, n_etfs - 1) signal_etf = target_etfs[cls].replace("_Ret", "") realized_ret = float(y_raw_test[i][cls]) + # Sanity clip: daily returns should never exceed Β±50% + realized_ret = max(-0.50, min(0.50, realized_ret)) net_ret = realized_ret - (fee_bps / 10000) strat_rets.append(net_ret) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py index 29f5de28e77d47460b82b0c1dad7249a005d6cee..48afd9682daacbe9ce9d72e86fd2c0cc5c8292cf 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py @@ -1,43 +1,35 @@ """ ui/components.py -Reusable Streamlit UI blocks: - - Freshness warning banner - - Next trading day signal banner - - Signal conviction panel - - Metrics row - - Audit trail table - - Comparison summary table +Reusable Streamlit UI blocks. +- Fixed applymap β†’ map deprecation +- Removed debug expanders +- Added show_all_signals_panel """ import streamlit as st import pandas as pd import numpy as np - from signals.conviction import conviction_color, conviction_icon -# ── Freshness warning ───────────────────────────────────────────────────────── +# ── Freshness status ────────────────────────────────────────────────────────── def show_freshness_status(freshness: dict): - """Display data freshness status. Stops app if data is stale.""" if freshness.get("fresh"): st.success(freshness["message"]) else: st.warning(freshness["message"]) -# ── Next trading day banner ─────────────────────────────────────────────────── +# ── Winner signal banner ────────────────────────────────────────────────────── def show_signal_banner(next_signal: str, next_date, approach_name: str): - """Large coloured banner showing the winning approach's next signal.""" is_cash = next_signal == "CASH" - bg = "linear-gradient(135deg, #2d3436 0%, #1a1a2e 100%)" if is_cash else \ - "linear-gradient(135deg, #00d1b2 0%, #00a896 100%)" - + bg = ("linear-gradient(135deg, #2d3436 0%, #1a1a2e 100%)" if is_cash + else "linear-gradient(135deg, #00d1b2 0%, #00a896 100%)") st.markdown(f"""
+ text-align:center; box-shadow:0 8px 16px rgba(0,0,0,0.3); margin:16px 0;">
{approach_name.upper()} Β· NEXT TRADING DAY SIGNAL @@ -50,78 +42,98 @@ def show_signal_banner(next_signal: str, next_date, approach_name: str): """, unsafe_allow_html=True) +# ── All models signals panel ────────────────────────────────────────────────── + +def show_all_signals_panel(all_signals: dict, target_etfs: list, + include_cash: bool, next_date, optimal_lookback: int): + APPROACH_COLORS = { + "Approach 1": "#00ffc8", + "Approach 2": "#7c6aff", + "Approach 3": "#ff6b6b", + } + + st.subheader(f"πŸ—“οΈ All Models β€” {next_date.strftime('%Y-%m-%d')} Signals") + st.caption(f"πŸ“ Optimal lookback: **{optimal_lookback}d** (auto-selected from 30/45/60)") + + cols = st.columns(len(all_signals)) + for col, (name, info) in zip(cols, all_signals.items()): + color = APPROACH_COLORS.get(name, "#888888") + signal = info["signal"] + proba = info["proba"] + top_prob = float(np.max(proba)) * 100 + is_winner = info["is_winner"] + badge = " ⭐" if is_winner else "" + + col.markdown(f""" +
+
+ {name.upper()}{badge} +
+
+ {signal} +
+
+ Confidence: {top_prob:.1f}% +
+
+ """, unsafe_allow_html=True) + + # ── Signal conviction panel ─────────────────────────────────────────────────── def show_conviction_panel(conviction: dict): - """ - White-background conviction panel with Z-score gauge and per-ETF bars. - Uses separate st.markdown calls per ETF row to avoid Streamlit HTML escaping. - """ - label = conviction["label"] - z_score = conviction["z_score"] - best_name = conviction["best_name"] + label = conviction["label"] + z_score = conviction["z_score"] + best_name = conviction["best_name"] sorted_pairs = conviction["sorted_pairs"] + color = conviction_color(label) + icon = conviction_icon(label) - color = conviction_color(label) - icon = conviction_icon(label) - - z_clipped = max(-3.0, min(3.0, z_score)) - bar_pct = int((z_clipped + 3) / 6 * 100) - - max_score = max(s for _, s in sorted_pairs) if sorted_pairs else 1.0 + z_clipped = max(-3.0, min(3.0, z_score)) + bar_pct = int((z_clipped + 3) / 6 * 100) + max_score = max((s for _, s in sorted_pairs), default=1.0) if max_score <= 0: max_score = 1.0 - # ── Header + gauge ──────────────────────────────────────────────────────── st.markdown(f"""
- -
+
{icon} Signal Conviction - + Z = {z_score:.2f} σ + font-weight:700; padding:4px 16px; border-radius:20px; font-size:13px;"> {label}
-
- Weak −3σ - Neutral 0σ - Strong +3σ + Weak −3σNeutral 0σStrong +3σ
-
-
+
+
+ background:linear-gradient(90deg,#fab1a0,{color}); border-radius:8px;">
-
Model probability by ETF (ranked high → low):
""", unsafe_allow_html=True) - # ── Per-ETF rows ────────────────────────────────────────────────────────── for i, (name, score) in enumerate(sorted_pairs): - is_winner = (name == best_name) - is_last = (i == len(sorted_pairs) - 1) - bar_w = int(score / max_score * 100) + is_winner = (name == best_name) + is_last = (i == len(sorted_pairs) - 1) + bar_w = int(score / max_score * 100) name_style = "font-weight:700; color:#00897b;" if is_winner else "color:#444;" bar_color = color if is_winner else "#b2dfdb" if score > max_score * 0.5 else "#e0e0e0" star = " β˜…" if is_winner else "" @@ -134,10 +146,9 @@ def show_conviction_panel(conviction: dict): box-shadow:0 2px 8px rgba(0,0,0,0.07);">
{name}{star} -
-
+
+
{score:.4f}
@@ -145,7 +156,7 @@ def show_conviction_panel(conviction: dict): """, unsafe_allow_html=True) st.caption( - "Z-score = std deviations the top ETF's probability sits above the mean of all ETF probabilities. " + "Z-score = std deviations the top ETF's probability sits above the mean. " "Higher β†’ model is more decisive." ) @@ -153,60 +164,44 @@ def show_conviction_panel(conviction: dict): # ── Metrics row ─────────────────────────────────────────────────────────────── def show_metrics_row(result: dict, tbill_rate: float): - """Five-column metric display.""" col1, col2, col3, col4, col5 = st.columns(5) - - col1.metric( - "πŸ“ˆ Annualised Return", - f"{result['ann_return']*100:.2f}%", - delta=f"vs T-bill: {(result['ann_return'] - tbill_rate)*100:.2f}%", - ) - col2.metric( - "πŸ“Š Sharpe Ratio", - f"{result['sharpe']:.2f}", - delta="Risk-Adjusted" if result['sharpe'] > 1 else "Below Threshold", - ) - col3.metric( - "🎯 Hit Ratio (15d)", - f"{result['hit_ratio']*100:.0f}%", - delta="Strong" if result['hit_ratio'] > 0.6 else "Weak", - ) - col4.metric( - "πŸ“‰ Max Drawdown", - f"{result['max_dd']*100:.2f}%", - delta="Peak to Trough", - ) - col5.metric( - "⚠️ Max Daily DD", - f"{result['max_daily_dd']*100:.2f}%", - delta="Worst Day", - ) + col1.metric("πŸ“ˆ Ann. Return", f"{result['ann_return']*100:.2f}%", + delta=f"vs T-bill: {(result['ann_return'] - tbill_rate)*100:.2f}%") + col2.metric("πŸ“Š Sharpe", f"{result['sharpe']:.2f}", + delta="Strong" if result['sharpe'] > 1 else "Weak") + col3.metric("🎯 Hit Ratio 15d", f"{result['hit_ratio']*100:.0f}%", + delta="Good" if result['hit_ratio'] > 0.55 else "Weak") + col4.metric("πŸ“‰ Max Drawdown", f"{result['max_dd']*100:.2f}%", + delta="Peak to Trough") + col5.metric("⚠️ Max Daily DD", f"{result['max_daily_dd']*100:.2f}%", + delta="Worst Day") # ── Comparison table ────────────────────────────────────────────────────────── def show_comparison_table(comparison_df: pd.DataFrame): - """Styled comparison table for all three approaches.""" def highlight_winner(row): if "WINNER" in str(row.get("Winner", "")): return ["background-color: rgba(0,200,150,0.15); font-weight:bold"] * len(row) return [""] * len(row) - styled = comparison_df.style.apply(highlight_winner, axis=1).set_properties(**{ - "text-align": "center", - "font-size": "14px", - }).set_table_styles([ - {"selector": "th", "props": [("font-size", "14px"), ("font-weight", "bold"), - ("text-align", "center")]}, - {"selector": "td", "props": [("padding", "10px")]}, - ]) + styled = ( + comparison_df.style + .apply(highlight_winner, axis=1) + .set_properties(**{"text-align": "center", "font-size": "14px"}) + .set_table_styles([ + {"selector": "th", "props": [("font-size", "14px"), + ("font-weight", "bold"), + ("text-align", "center")]}, + {"selector": "td", "props": [("padding", "10px")]}, + ]) + ) st.dataframe(styled, use_container_width=True) # ── Audit trail ─────────────────────────────────────────────────────────────── def show_audit_trail(audit_trail: list): - """Last 20 days styled audit trail.""" if not audit_trail: st.info("No audit trail data available.") return @@ -214,59 +209,19 @@ def show_audit_trail(audit_trail: list): df = pd.DataFrame(audit_trail).tail(20)[["Date", "Signal", "Net_Return"]] def color_return(val): - return "color: #00c896; font-weight:bold" if val > 0 else "color: #ff4b4b; font-weight:bold" - - styled = df.style.applymap(color_return, subset=["Net_Return"]).format( - {"Net_Return": "{:.2%}"} - ).set_properties(**{ - "font-size": "16px", - "text-align": "center", - }).set_table_styles([ - {"selector": "th", "props": [("font-size", "16px"), ("font-weight", "bold"), - ("text-align", "center")]}, - {"selector": "td", "props": [("padding", "10px")]}, - ]) + return ("color: #00c896; font-weight:bold" if val > 0 + else "color: #ff4b4b; font-weight:bold") + + styled = ( + df.style + .map(color_return, subset=["Net_Return"]) + .format({"Net_Return": "{:.2%}"}) + .set_properties(**{"font-size": "14px", "text-align": "center"}) + .set_table_styles([ + {"selector": "th", "props": [("font-size", "14px"), + ("font-weight", "bold"), + ("text-align", "center")]}, + {"selector": "td", "props": [("padding", "10px")]}, + ]) + ) st.dataframe(styled, use_container_width=True, height=500) - - -# ── All models' next day signals panel ─────────────────────────────────────── - -def show_all_signals_panel(all_signals: dict, target_etfs: list, include_cash: bool, next_date): - """ - Compact panel showing what each model predicts for next trading day, - with top probability displayed. - """ - APPROACH_COLORS = { - "Approach 1": "#00ffc8", - "Approach 2": "#7c6aff", - "Approach 3": "#ff6b6b", - } - - st.subheader(f"πŸ—“οΈ All Models β€” {next_date.strftime('%Y-%m-%d')} Signals") - - cols = st.columns(len(all_signals)) - for col, (name, info) in zip(cols, all_signals.items()): - color = APPROACH_COLORS.get(name, "#888888") - signal = info["signal"] - proba = info["proba"] - top_prob = float(np.max(proba)) * 100 - is_winner = info["is_winner"] - border = f"3px solid {color}" - badge = " ⭐ WINNER" if is_winner else "" - - col.markdown(f""" -
-
- {name.upper()}{badge} -
-
- {signal} -
-
- Top prob: {top_prob:.1f}% -
-
- """, unsafe_allow_html=True) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/charts.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/charts.py index fc3b824bf2f3cbedd7b956d6969fad5e5ecca980..3965d4d7256f2fd352e4ec58bb1b30bfb2c3fb5c 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/charts.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/charts.py @@ -1,22 +1,15 @@ """ ui/charts.py -All Plotly chart builders for the Streamlit UI. +Plotly chart builders. +Equity curve: winner + SPY + AGG only. Y-axis as % growth (not raw multiplier). """ import numpy as np import pandas as pd import plotly.graph_objects as go - -APPROACH_COLOURS = { - "Approach 1": "#00ffc8", - "Approach 2": "#7c6aff", - "Approach 3": "#ff6b6b", -} -BENCHMARK_COLOURS = { - "SPY": "#ff4b4b", - "AGG": "#ffa500", -} +WINNER_COLOUR = "#00ffc8" +BENCHMARK_COLOURS = {"SPY": "#ff4b4b", "AGG": "#ffa500"} def equity_curve_chart( @@ -28,117 +21,72 @@ def equity_curve_chart( tbill_rate: float, ) -> go.Figure: """ - Equity curve chart showing all three approaches + SPY + AGG benchmarks. - - Args: - results : {approach_name: result_dict} - winner_name : highlighted approach - plot_dates : DatetimeIndex for x-axis - df : full DataFrame (for benchmark columns) - test_slice : slice object to extract test-period benchmark returns - tbill_rate : for benchmark metric calculation + Equity curve: winner strategy vs SPY and AGG. + Y-axis shows % growth (cum_return - 1) * 100 for readability. """ from strategy.backtest import compute_benchmark_metrics fig = go.Figure() - # ── Strategy lines ──────────────────────────────────────────────────────── - for name, res in results.items(): - if res is None: - continue - colour = APPROACH_COLOURS.get(name, "#aaaaaa") - width = 3 if name == winner_name else 1.5 - dash = "solid" if name == winner_name else "dot" - - n = min(len(res["cum_returns"]), len(plot_dates)) - + # ── Winner strategy ─────────────────────────────────────────────────────── + winner_res = results.get(winner_name) + if winner_res is not None: + cum = winner_res["cum_returns"] + n = min(len(cum), len(plot_dates)) fig.add_trace(go.Scatter( x=plot_dates[:n], - y=res["cum_returns"][:n], + y=(cum[:n] - 1) * 100, mode="lines", - name=f"{name} {'β˜…' if name == winner_name else ''}", - line=dict(color=colour, width=width, dash=dash), - fill="tozeroy" if name == winner_name else None, - fillcolor=f"rgba({_hex_to_rgb(colour)},0.07)" if name == winner_name else None, + name=f"{winner_name} β˜…", + line=dict(color=WINNER_COLOUR, width=2.5), + fill="tozeroy", + fillcolor="rgba(0,255,200,0.07)", )) - # ── Benchmark: SPY ──────────────────────────────────────────────────────── + # ── SPY benchmark ───────────────────────────────────────────────────────── if "SPY_Ret" in df.columns: - spy_rets = df["SPY_Ret"].iloc[test_slice].values + spy_rets = df["SPY_Ret"].iloc[test_slice].values.copy() + spy_rets = np.clip(spy_rets, -0.5, 0.5) # sanity clip + spy_rets = spy_rets[~np.isnan(spy_rets)] n = min(len(spy_rets), len(plot_dates)) spy_m = compute_benchmark_metrics(spy_rets[:n], tbill_rate) fig.add_trace(go.Scatter( x=plot_dates[:n], - y=spy_m["cum_returns"], + y=(spy_m["cum_returns"] - 1) * 100, mode="lines", - name="SPY (Equity BM)", + name="SPY", line=dict(color=BENCHMARK_COLOURS["SPY"], width=1.5, dash="dot"), )) - # ── Benchmark: AGG ──────────────────────────────────────────────────────── + # ── AGG benchmark ───────────────────────────────────────────────────────── if "AGG_Ret" in df.columns: - agg_rets = df["AGG_Ret"].iloc[test_slice].values + agg_rets = df["AGG_Ret"].iloc[test_slice].values.copy() + agg_rets = np.clip(agg_rets, -0.5, 0.5) + agg_rets = agg_rets[~np.isnan(agg_rets)] n = min(len(agg_rets), len(plot_dates)) agg_m = compute_benchmark_metrics(agg_rets[:n], tbill_rate) fig.add_trace(go.Scatter( x=plot_dates[:n], - y=agg_m["cum_returns"], + y=(agg_m["cum_returns"] - 1) * 100, mode="lines", - name="AGG (Bond BM)", + name="AGG", line=dict(color=BENCHMARK_COLOURS["AGG"], width=1.5, dash="dot"), )) fig.update_layout( template="plotly_dark", - height=460, + height=420, hovermode="x unified", - showlegend=True, legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01, font=dict(size=11)), xaxis_title="Date", - yaxis_title="Cumulative Return (Γ—)", + yaxis_title="Cumulative Return (%)", margin=dict(l=50, r=30, t=20, b=50), + yaxis=dict(ticksuffix="%"), ) return fig -def comparison_bar_chart(results: dict, winner_name: str) -> go.Figure: - """ - Horizontal bar chart comparing annualised returns across all three approaches. - """ - names = [] - returns = [] - colours = [] - - for name, res in results.items(): - if res is None: - continue - names.append(name) - returns.append(res["ann_return"] * 100) - colours.append(APPROACH_COLOURS.get(name, "#aaaaaa")) - - fig = go.Figure(go.Bar( - x=returns, - y=names, - orientation="h", - marker_color=colours, - text=[f"{r:.1f}%" for r in returns], - textposition="auto", - )) - - fig.update_layout( - template="plotly_dark", - height=200, - xaxis_title="Annualised Return (%)", - margin=dict(l=100, r=30, t=10, b=40), - showlegend=False, - ) - return fig - - -# ── Helper ──────────────────────────────────────────────────────────────────── - def _hex_to_rgb(hex_color: str) -> str: - """Convert #rrggbb to 'r,g,b' string for rgba().""" h = hex_color.lstrip("#") r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16) return f"{r},{g},{b}" diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py index b8f6d0445d0bdd6c7d364b1c63d52cc7c37f8143..b020ba27880c8a58a313304eab320de88361dd57 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py @@ -1,28 +1,53 @@ """ models/base.py -Shared utilities for all three CNN-LSTM variants. -Key fix: class_weight support to prevent majority-class collapse. +Shared utilities for all CNN-LSTM variants. +Optimised for CPU training on HF Spaces. """ import numpy as np -import pandas as pd +import hashlib +import pickle +import os +from pathlib import Path from sklearn.preprocessing import RobustScaler from sklearn.utils.class_weight import compute_class_weight -import tensorflow as tf -from tensorflow import keras -SEED = 42 -tf.random.set_seed(SEED) +SEED = 42 +CACHE_DIR = Path("/tmp/p2_model_cache") +CACHE_DIR.mkdir(exist_ok=True) + np.random.seed(SEED) +# ── Cache helpers ───────────────────────────────────────────────────────────── + +def make_cache_key(last_date: str, start_yr: int, fee_bps: int, + epochs: int, split: str, include_cash: bool, + lookback: int) -> str: + raw = f"{last_date}_{start_yr}_{fee_bps}_{epochs}_{split}_{include_cash}_{lookback}" + return hashlib.md5(raw.encode()).hexdigest() + + +def save_cache(key: str, payload: dict): + path = CACHE_DIR / f"{key}.pkl" + with open(path, "wb") as f: + pickle.dump(payload, f) + + +def load_cache(key: str) -> dict | None: + path = CACHE_DIR / f"{key}.pkl" + if path.exists(): + try: + with open(path, "rb") as f: + return pickle.load(f) + except Exception: + path.unlink(missing_ok=True) + return None + + # ── Sequence builder ────────────────────────────────────────────────────────── def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int): - """ - Build supervised sequences for CNN-LSTM input. - X[i] = features[i : i+lookback] β†’ predicts y[i+lookback] - """ X, y = [], [] for i in range(lookback, len(features)): X.append(features[i - lookback: i]) @@ -36,35 +61,25 @@ def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15): n = len(X) t1 = int(n * train_pct) t2 = int(n * (train_pct + val_pct)) - return ( - X[:t1], y[:t1], - X[t1:t2], y[t1:t2], - X[t2:], y[t2:], - ) + return X[:t1], y[:t1], X[t1:t2], y[t1:t2], X[t2:], y[t2:] # ── Feature scaling ─────────────────────────────────────────────────────────── def scale_features(X_train, X_val, X_test): - n_feat = X_train.shape[2] - scaler = RobustScaler() + n_feat = X_train.shape[2] + scaler = RobustScaler() scaler.fit(X_train.reshape(-1, n_feat)) - def _t(X): s = X.shape return scaler.transform(X.reshape(-1, n_feat)).reshape(s) - return _t(X_train), _t(X_val), _t(X_test), scaler # ── Label builder ───────────────────────────────────────────────────────────── def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0): - """ - Assign label = argmax(returns). - If include_cash and best return < cash_threshold β†’ label = n_etfs (CASH). - """ - best = np.argmax(y_raw, axis=1) + best = np.argmax(y_raw, axis=1) if include_cash: best_ret = y_raw[np.arange(len(y_raw)), best] cash_idx = y_raw.shape[1] @@ -77,35 +92,22 @@ def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0): # ── Class weights ───────────────────────────────────────────────────────────── def compute_class_weights(y_labels: np.ndarray, n_classes: int) -> dict: - """ - Compute balanced class weights to counteract majority-class collapse. - Returns dict {class_index: weight} for use in model.fit(). - """ - classes = np.arange(n_classes) present = np.unique(y_labels) - try: - weights = compute_class_weight( - class_weight="balanced", - classes=present, - y=y_labels, - ) + weights = compute_class_weight("balanced", classes=present, y=y_labels) weight_dict = {int(c): float(w) for c, w in zip(present, weights)} except Exception: weight_dict = {} - - # Fill any missing classes with weight 1.0 - for c in classes: + for c in range(n_classes): if c not in weight_dict: weight_dict[c] = 1.0 - return weight_dict # ── Callbacks ───────────────────────────────────────────────────────────────── -def get_callbacks(patience_es=20, patience_lr=10, min_lr=1e-6): - """Longer patience to allow models time to learn past majority class.""" +def get_callbacks(patience_es=15, patience_lr=8, min_lr=1e-6): + from tensorflow import keras return [ keras.callbacks.EarlyStopping( monitor="val_loss", @@ -123,51 +125,76 @@ def get_callbacks(patience_es=20, patience_lr=10, min_lr=1e-6): ] -# ── Output head ─────────────────────────────────────────────────────────────── +# ── Lightweight output head (CPU-optimised) ─────────────────────────────────── def classification_head(x, n_classes: int, dropout: float = 0.3): - x = keras.layers.Dense(64, activation="relu")(x) - x = keras.layers.BatchNormalization()(x) - x = keras.layers.Dropout(dropout)(x) + """Smaller head than original β€” faster on CPU, less overfitting risk.""" + from tensorflow import keras x = keras.layers.Dense(32, activation="relu")(x) - x = keras.layers.Dropout(dropout / 2)(x) + x = keras.layers.Dropout(dropout)(x) x = keras.layers.Dense(n_classes, activation="softmax")(x) return x -# ── Prediction ──────────────────────────────────────────────────────────────── - -def predict_classes(model, X_test: np.ndarray) -> tuple: - proba = model.predict(X_test, verbose=0) - return np.argmax(proba, axis=1), proba - +# ── Auto lookback selection ─────────────────────────────────────────────────── -# ── Metrics ─────────────────────────────────────────────────────────────────── - -def evaluate_returns( - preds, proba, y_raw_test, target_etfs, tbill_rate, fee_bps, include_cash=True, +def find_best_lookback( + X_raw: np.ndarray, + y_raw: np.ndarray, + y_labels_fn, + train_pct: float, + val_pct: float, + n_classes: int, + include_cash: bool, + candidates: list = None, ): - n_etfs = len(target_etfs) - daily_tbill = tbill_rate / 252 - strat_rets = [] - - for i, cls in enumerate(preds): - if include_cash and cls == n_etfs: - net = daily_tbill - fee_bps / 10000 - else: - cls = min(int(cls), n_etfs - 1) - net = float(y_raw_test[i][cls]) - fee_bps / 10000 - strat_rets.append(net) - - strat_rets = np.array(strat_rets) - cum_returns = np.cumprod(1 + strat_rets) - ann_return = cum_returns[-1] ** (252 / len(strat_rets)) - 1 - - last_proba = proba[-1] - next_cls = int(np.argmax(last_proba)) - next_etf = ( - "CASH" if (include_cash and next_cls == n_etfs) - else target_etfs[min(next_cls, n_etfs - 1)].replace("_Ret", "") - ) - - return strat_rets, ann_return, cum_returns, last_proba, next_etf + """ + Train a fast lightweight CNN on each lookback candidate using val loss. + Returns best lookback int. + Uses only Approach 1 architecture (fastest) to pick the winner. + """ + from tensorflow import keras + + if candidates is None: + candidates = [30, 45, 60] + + best_lb = candidates[0] + best_loss = np.inf + + for lb in candidates: + try: + X_seq, y_seq = build_sequences(X_raw, y_raw, lb) + y_lab = y_labels_fn(y_seq) + + X_tr, y_tr, X_v, y_v, _, _ = train_val_test_split(X_seq, y_lab, train_pct, val_pct) + X_tr_s, X_v_s, _, _ = scale_features(X_tr, X_v, X_v) + + cw = compute_class_weights(y_tr, n_classes) + + # Tiny fast model just for lookback selection + inp = keras.Input(shape=X_tr_s.shape[1:]) + x = keras.layers.Conv1D(16, min(3, lb), padding="causal", activation="relu")(inp) + x = keras.layers.GlobalAveragePooling1D()(x) + out = keras.layers.Dense(n_classes, activation="softmax")(x) + m = keras.Model(inp, out) + m.compile(optimizer="adam", loss="sparse_categorical_crossentropy") + + hist = m.fit( + X_tr_s, y_tr, + validation_data=(X_v_s, y_v), + epochs=15, + batch_size=64, + class_weight=cw, + callbacks=[keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)], + verbose=0, + ) + val_loss = min(hist.history.get("val_loss", [np.inf])) + if val_loss < best_loss: + best_loss = val_loss + best_lb = lb + + del m + except Exception: + continue + + return best_lb diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py index 64a88a2b63192fbb52940b88495eff46f2e54006..38a7312286d3fa90a640f71ed9e8a70cf050d138 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py @@ -1,14 +1,21 @@ """ models/approach1_wavelet.py Approach 1: Wavelet Decomposition CNN-LSTM -With class weights to prevent majority-class collapse. +- Dynamic wavelet level based on sequence length (no boundary warnings) +- CPU-optimised smaller architecture +- Class weights to prevent majority-class collapse """ import numpy as np import pywt WAVELET = "db4" -LEVEL = 3 + + +def _safe_wavelet_level(lookback: int, wavelet: str = WAVELET) -> int: + """Compute max safe wavelet level for the given sequence length.""" + max_level = pywt.dwt_max_level(lookback, wavelet) + return min(2, max_level) # cap at 2 to avoid boundary effects def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> np.ndarray: @@ -16,13 +23,14 @@ def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> n coeffs = pywt.wavedec(signal, wavelet, level=level) bands = [] for c in coeffs: - band = np.interp(np.linspace(0, len(c)-1, T), np.arange(len(c)), c) + band = np.interp(np.linspace(0, len(c) - 1, T), np.arange(len(c)), c) bands.append(band) return np.stack(bands, axis=-1) -def apply_wavelet_transform(X: np.ndarray, wavelet=WAVELET, level=LEVEL) -> np.ndarray: +def apply_wavelet_transform(X: np.ndarray, wavelet: str = WAVELET) -> np.ndarray: n_samples, lookback, n_features = X.shape + level = _safe_wavelet_level(lookback, wavelet) n_bands = level + 1 X_wt = np.zeros((n_samples, lookback, n_features * n_bands), dtype=np.float32) for s in range(n_samples): @@ -33,18 +41,18 @@ def apply_wavelet_transform(X: np.ndarray, wavelet=WAVELET, level=LEVEL) -> np.n return X_wt -def build_wavelet_cnn_lstm(input_shape, n_classes, dropout=0.3, lstm_units=128): +def build_wavelet_cnn_lstm(input_shape, n_classes, dropout=0.3, lstm_units=64): from tensorflow import keras from models.base import classification_head inputs = keras.Input(shape=input_shape) - x = keras.layers.Conv1D(64, 3, padding="causal", activation="relu")(inputs) + x = keras.layers.Conv1D(32, 3, padding="causal", activation="relu")(inputs) x = keras.layers.BatchNormalization()(x) x = keras.layers.MaxPooling1D(2)(x) - x = keras.layers.Conv1D(32, 3, padding="causal", activation="relu")(x) + x = keras.layers.Conv1D(16, 3, padding="causal", activation="relu")(x) x = keras.layers.BatchNormalization()(x) x = keras.layers.Dropout(dropout)(x) - x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(x) + x = keras.layers.LSTM(lstm_units, dropout=dropout)(x) outputs = classification_head(x, n_classes, dropout) model = keras.Model(inputs, outputs, name="Approach1_Wavelet") @@ -58,7 +66,7 @@ def build_wavelet_cnn_lstm(input_shape, n_classes, dropout=0.3, lstm_units=128): def train_approach1( X_train, y_train, X_val, y_val, - n_classes, epochs=100, batch_size=32, dropout=0.3, lstm_units=128, + n_classes, epochs=80, batch_size=64, dropout=0.3, lstm_units=64, ): from models.base import get_callbacks, compute_class_weights diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py index 876b6d1f75c7b37a76b8464349043a3236df07be..f16c38ae40ed06d65e920cd6726fb92b5eeb96e9 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py @@ -1,29 +1,18 @@ """ models/approach2_regime.py Approach 2: Regime-Conditioned CNN-LSTM - -Pipeline: - Raw macro signals - -> CNN Tower (64 filters, k=3) -> feature vector - -> Regime Classifier (HMM on VIX + HY spread + T10Y2Y) -> one-hot [4] - -> Concatenate CNN features + regime embedding - -> LSTM (128 units) - -> Dense 64 -> Softmax (n_etfs + 1 CASH) - -NOTE: tensorflow and hmmlearn are imported lazily inside functions -to prevent module-level import failures from making this module -appear broken to Python's import system. +- Fixed HMM convergence settings +- CPU-optimised smaller architecture +- Lazy imports to prevent module-level failures +- Class weights to prevent majority-class collapse """ import numpy as np -N_REGIMES = 4 -REGIME_HINTS = ["VIX", "HY", "Spread", "T10Y2Y", "T10Y3M", "Credit"] - +N_REGIMES = 3 # reduced from 4 to improve HMM convergence +REGIME_HINTS = ["VIX", "HY", "Spread", "T10Y2Y", "T10Y3M", "Credit", + "IG_SPREAD", "HY_SPREAD"] -# --------------------------------------------------------------------------- -# Regime detection helpers -# --------------------------------------------------------------------------- def _get_regime_cols(feature_names: list) -> list: return [ @@ -34,19 +23,12 @@ def _get_regime_cols(feature_names: list) -> list: def fit_regime_model(X_flat: np.ndarray, feature_names: list, n_regimes: int = N_REGIMES): - """ - Fit a Gaussian HMM on regime-relevant macro features. - Returns (hmm_model, regime_cols_idx). - hmm_model is None if hmmlearn is unavailable or fitting fails. - """ regime_col_names = _get_regime_cols(feature_names) if not regime_col_names: regime_col_names = feature_names[:min(3, len(feature_names))] - regime_cols_idx = [ - feature_names.index(c) for c in regime_col_names - if c in feature_names - ] + regime_cols_idx = [feature_names.index(c) for c in regime_col_names + if c in feature_names] X_regime = X_flat[:, regime_cols_idx] try: @@ -54,70 +36,55 @@ def fit_regime_model(X_flat: np.ndarray, feature_names: list, hmm = GaussianHMM( n_components=n_regimes, covariance_type="diag", - n_iter=100, + n_iter=50, # reduced from 100 + tol=1e-2, # looser tolerance β€” avoids non-convergence warning random_state=42, ) hmm.fit(X_regime) return hmm, regime_cols_idx except Exception as e: - print(f"[Approach 2] HMM fitting failed: {e}. Using fallback.") + print(f"[Approach 2] HMM failed: {e}. Using quantile fallback.") return None, regime_cols_idx def predict_regimes(hmm_model, X_flat: np.ndarray, regime_cols_idx: list, n_regimes: int = N_REGIMES) -> np.ndarray: - """Predict integer regime label for each day.""" X_regime = X_flat[:, regime_cols_idx] - if hmm_model is not None: try: return hmm_model.predict(X_regime) except Exception: pass - - # Fallback: quantile binning on first regime feature feat = X_regime[:, 0] quantiles = np.percentile(feat, np.linspace(0, 100, n_regimes + 1)) return np.digitize(feat, quantiles[1:-1]).astype(int) -def regimes_to_onehot(regimes: np.ndarray, - n_regimes: int = N_REGIMES) -> np.ndarray: +def regimes_to_onehot(regimes: np.ndarray, n_regimes: int = N_REGIMES) -> np.ndarray: one_hot = np.zeros((len(regimes), n_regimes), dtype=np.float32) for i, r in enumerate(regimes): one_hot[i, min(int(r), n_regimes - 1)] = 1.0 return one_hot -def build_regime_sequences(X_seq: np.ndarray, - regimes_flat: np.ndarray, +def build_regime_sequences(X_seq: np.ndarray, regimes_flat: np.ndarray, lookback: int) -> np.ndarray: n_samples = X_seq.shape[0] aligned = regimes_flat[lookback: lookback + n_samples] return regimes_to_onehot(aligned) -# --------------------------------------------------------------------------- -# Model builder -# --------------------------------------------------------------------------- - -def build_regime_cnn_lstm(seq_input_shape: tuple, - n_classes: int, - n_regimes: int = N_REGIMES, - dropout: float = 0.3, - lstm_units: int = 128): - """Build and compile the regime-conditioned CNN-LSTM model.""" +def build_regime_cnn_lstm(seq_input_shape, n_classes, + n_regimes=N_REGIMES, dropout=0.3, lstm_units=64): from tensorflow import keras from models.base import classification_head seq_input = keras.Input(shape=seq_input_shape, name="seq_input") - x = keras.layers.Conv1D(64, kernel_size=3, padding="causal", - activation="relu")(seq_input) + x = keras.layers.Conv1D(32, 3, padding="causal", activation="relu")(seq_input) x = keras.layers.BatchNormalization()(x) - x = keras.layers.MaxPooling1D(pool_size=2)(x) - x = keras.layers.Conv1D(32, kernel_size=3, padding="causal", - activation="relu")(x) + x = keras.layers.MaxPooling1D(2)(x) + x = keras.layers.Conv1D(16, 3, padding="causal", activation="relu")(x) x = keras.layers.BatchNormalization()(x) x = keras.layers.Dropout(dropout)(x) cnn_out = keras.layers.GlobalAveragePooling1D()(x) @@ -128,59 +95,35 @@ def build_regime_cnn_lstm(seq_input_shape: tuple, merged = keras.layers.Concatenate()([cnn_out, regime_emb]) x = keras.layers.Reshape((1, merged.shape[-1]))(merged) x = keras.layers.LSTM(lstm_units, dropout=dropout)(x) - outputs = classification_head(x, n_classes, dropout) - model = keras.Model( - inputs=[seq_input, regime_input], - outputs=outputs, - name="Approach2_Regime_CNN_LSTM", - ) + model = keras.Model(inputs=[seq_input, regime_input], outputs=outputs, + name="Approach2_Regime") model.compile( - optimizer=keras.optimizers.Adam(learning_rate=1e-3), + optimizer=keras.optimizers.Adam(1e-3), loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) return model -# --------------------------------------------------------------------------- -# Training pipeline -# --------------------------------------------------------------------------- - def train_approach2( - X_train, y_train, - X_val, y_val, - X_flat_all: np.ndarray, - feature_names: list, - lookback: int, - train_size: int, - val_size: int, - n_classes: int, - epochs: int = 100, - batch_size: int = 32, - dropout: float = 0.3, - lstm_units: int = 128, + X_train, y_train, X_val, y_val, + X_flat_all, feature_names, lookback, + train_size, val_size, n_classes, + epochs=80, batch_size=64, dropout=0.3, lstm_units=64, ): - """ - Fit HMM regime model then train the regime-conditioned CNN-LSTM. - Returns: model, history, hmm_model, regime_cols_idx - """ from models.base import get_callbacks, compute_class_weights X_flat_train = X_flat_all[:train_size + lookback] hmm_model, regime_cols_idx = fit_regime_model(X_flat_train, feature_names) - regimes_all = predict_regimes(hmm_model, X_flat_all, regime_cols_idx) R_train = build_regime_sequences(X_train, regimes_all, lookback) R_val = build_regime_sequences(X_val, regimes_all, lookback + train_size) - model = build_regime_cnn_lstm( - X_train.shape[1:], n_classes, - dropout=dropout, lstm_units=lstm_units, - ) - + model = build_regime_cnn_lstm(X_train.shape[1:], n_classes, + dropout=dropout, lstm_units=lstm_units) cw = compute_class_weights(y_train, n_classes) history = model.fit( @@ -192,29 +135,13 @@ def train_approach2( callbacks=get_callbacks(), verbose=0, ) - return model, history, hmm_model, regime_cols_idx -# --------------------------------------------------------------------------- -# Inference -# --------------------------------------------------------------------------- - -def predict_approach2( - model, - X_test: np.ndarray, - X_flat_all: np.ndarray, - regime_cols_idx: list, - hmm_model, - lookback: int, - train_size: int, - val_size: int, -) -> tuple: - """Predict on test set with regime conditioning. Returns (preds, proba).""" +def predict_approach2(model, X_test, X_flat_all, regime_cols_idx, + hmm_model, lookback, train_size, val_size) -> tuple: regimes_all = predict_regimes(hmm_model, X_flat_all, regime_cols_idx) offset = lookback + train_size + val_size R_test = build_regime_sequences(X_test, regimes_all, offset) - - proba = model.predict([X_test, R_test], verbose=0) - preds = np.argmax(proba, axis=1) - return preds, proba + proba = model.predict([X_test, R_test], verbose=0) + return np.argmax(proba, axis=1), proba diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py index 09014aae1de9efef3f00618cfdac27addc7aed4c..d1983181b046f8bb05c7757a65deab72f18e43fc 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py @@ -1,18 +1,21 @@ """ models/approach3_multiscale.py Approach 3: Multi-Scale Parallel CNN-LSTM -With class weights to prevent majority-class collapse. +- CPU-optimised smaller architecture +- Class weights to prevent majority-class collapse +- Lazy imports to prevent module-level failures """ import numpy as np KERNEL_SIZES = [3, 7, 21] -FILTERS_EACH = 32 +FILTERS_EACH = 16 # reduced from 32 for CPU speed def build_multiscale_cnn_lstm( - input_shape, n_classes, kernel_sizes=None, - filters=FILTERS_EACH, dropout=0.3, lstm_units=128, + input_shape, n_classes, + kernel_sizes=None, filters=FILTERS_EACH, + dropout=0.3, lstm_units=64, ): from tensorflow import keras from models.base import classification_head @@ -22,19 +25,15 @@ def build_multiscale_cnn_lstm( inputs = keras.Input(shape=input_shape, name="multiscale_input") towers = [] - for k in kernel_sizes: t = keras.layers.Conv1D(filters, k, padding="causal", activation="relu", name=f"conv1_k{k}")(inputs) t = keras.layers.BatchNormalization(name=f"bn1_k{k}")(t) - t = keras.layers.Conv1D(filters, k, padding="causal", activation="relu", - name=f"conv2_k{k}")(t) - t = keras.layers.BatchNormalization(name=f"bn2_k{k}")(t) t = keras.layers.Dropout(dropout, name=f"drop_k{k}")(t) towers.append(t) - merged = keras.layers.Concatenate(axis=-1)(towers) if len(towers) > 1 else towers[0] - x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(merged) + merged = keras.layers.Concatenate(axis=-1)(towers) if len(towers) > 1 else towers[0] + x = keras.layers.LSTM(lstm_units, dropout=dropout)(merged) outputs = classification_head(x, n_classes, dropout) model = keras.Model(inputs, outputs, name="Approach3_MultiScale") @@ -48,8 +47,8 @@ def build_multiscale_cnn_lstm( def train_approach3( X_train, y_train, X_val, y_val, - n_classes, epochs=100, batch_size=32, - dropout=0.3, lstm_units=128, kernel_sizes=None, + n_classes, epochs=80, batch_size=64, + dropout=0.3, lstm_units=64, kernel_sizes=None, ): from models.base import get_callbacks, compute_class_weights @@ -59,7 +58,8 @@ def train_approach3( lookback = X_train.shape[1] valid_kernels = [k for k in kernel_sizes if k <= lookback] or [min(3, lookback)] model = build_multiscale_cnn_lstm( - X_train.shape[1:], n_classes, valid_kernels, dropout=dropout, lstm_units=lstm_units, + X_train.shape[1:], n_classes, valid_kernels, + dropout=dropout, lstm_units=lstm_units, ) cw = compute_class_weights(y_train, n_classes) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py index b020ba27880c8a58a313304eab320de88361dd57..9d6c137e9000054c5531972594c2d085e74782b0 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py @@ -1,200 +1,171 @@ """ -models/base.py -Shared utilities for all CNN-LSTM variants. -Optimised for CPU training on HF Spaces. +strategy/backtest.py +Strategy execution, performance metrics, and benchmark calculations. + +CASH logic (drawdown risk overlay β€” not a model class): + ENTER : 2-day cumulative return <= -15% + EXIT : model conviction Z-score >= 1.0 (model decisively picks an ETF again) """ import numpy as np -import hashlib -import pickle -import os -from pathlib import Path -from sklearn.preprocessing import RobustScaler -from sklearn.utils.class_weight import compute_class_weight - -SEED = 42 -CACHE_DIR = Path("/tmp/p2_model_cache") -CACHE_DIR.mkdir(exist_ok=True) - -np.random.seed(SEED) - - -# ── Cache helpers ───────────────────────────────────────────────────────────── - -def make_cache_key(last_date: str, start_yr: int, fee_bps: int, - epochs: int, split: str, include_cash: bool, - lookback: int) -> str: - raw = f"{last_date}_{start_yr}_{fee_bps}_{epochs}_{split}_{include_cash}_{lookback}" - return hashlib.md5(raw.encode()).hexdigest() - - -def save_cache(key: str, payload: dict): - path = CACHE_DIR / f"{key}.pkl" - with open(path, "wb") as f: - pickle.dump(payload, f) - - -def load_cache(key: str) -> dict | None: - path = CACHE_DIR / f"{key}.pkl" - if path.exists(): - try: - with open(path, "rb") as f: - return pickle.load(f) - except Exception: - path.unlink(missing_ok=True) - return None - - -# ── Sequence builder ────────────────────────────────────────────────────────── - -def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int): - X, y = [], [] - for i in range(lookback, len(features)): - X.append(features[i - lookback: i]) - y.append(targets[i]) - return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32) - - -# ── Train / val / test split ────────────────────────────────────────────────── - -def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15): - n = len(X) - t1 = int(n * train_pct) - t2 = int(n * (train_pct + val_pct)) - return X[:t1], y[:t1], X[t1:t2], y[t1:t2], X[t2:], y[t2:] - - -# ── Feature scaling ─────────────────────────────────────────────────────────── - -def scale_features(X_train, X_val, X_test): - n_feat = X_train.shape[2] - scaler = RobustScaler() - scaler.fit(X_train.reshape(-1, n_feat)) - def _t(X): - s = X.shape - return scaler.transform(X.reshape(-1, n_feat)).reshape(s) - return _t(X_train), _t(X_val), _t(X_test), scaler - - -# ── Label builder ───────────────────────────────────────────────────────────── - -def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0): - best = np.argmax(y_raw, axis=1) - if include_cash: - best_ret = y_raw[np.arange(len(y_raw)), best] - cash_idx = y_raw.shape[1] - labels = np.where(best_ret < cash_threshold, cash_idx, best) - else: - labels = best - return labels.astype(np.int32) - - -# ── Class weights ───────────────────────────────────────────────────────────── - -def compute_class_weights(y_labels: np.ndarray, n_classes: int) -> dict: - present = np.unique(y_labels) - try: - weights = compute_class_weight("balanced", classes=present, y=y_labels) - weight_dict = {int(c): float(w) for c, w in zip(present, weights)} - except Exception: - weight_dict = {} - for c in range(n_classes): - if c not in weight_dict: - weight_dict[c] = 1.0 - return weight_dict - - -# ── Callbacks ───────────────────────────────────────────────────────────────── - -def get_callbacks(patience_es=15, patience_lr=8, min_lr=1e-6): - from tensorflow import keras - return [ - keras.callbacks.EarlyStopping( - monitor="val_loss", - patience=patience_es, - restore_best_weights=True, - verbose=0, - ), - keras.callbacks.ReduceLROnPlateau( - monitor="val_loss", - factor=0.5, - patience=patience_lr, - min_lr=min_lr, - verbose=0, - ), - ] - - -# ── Lightweight output head (CPU-optimised) ─────────────────────────────────── - -def classification_head(x, n_classes: int, dropout: float = 0.3): - """Smaller head than original β€” faster on CPU, less overfitting risk.""" - from tensorflow import keras - x = keras.layers.Dense(32, activation="relu")(x) - x = keras.layers.Dropout(dropout)(x) - x = keras.layers.Dense(n_classes, activation="softmax")(x) - return x - - -# ── Auto lookback selection ─────────────────────────────────────────────────── - -def find_best_lookback( - X_raw: np.ndarray, - y_raw: np.ndarray, - y_labels_fn, - train_pct: float, - val_pct: float, - n_classes: int, - include_cash: bool, - candidates: list = None, -): - """ - Train a fast lightweight CNN on each lookback candidate using val loss. - Returns best lookback int. - Uses only Approach 1 architecture (fastest) to pick the winner. - """ - from tensorflow import keras - - if candidates is None: - candidates = [30, 45, 60] - - best_lb = candidates[0] - best_loss = np.inf - - for lb in candidates: - try: - X_seq, y_seq = build_sequences(X_raw, y_raw, lb) - y_lab = y_labels_fn(y_seq) - - X_tr, y_tr, X_v, y_v, _, _ = train_val_test_split(X_seq, y_lab, train_pct, val_pct) - X_tr_s, X_v_s, _, _ = scale_features(X_tr, X_v, X_v) - - cw = compute_class_weights(y_tr, n_classes) - - # Tiny fast model just for lookback selection - inp = keras.Input(shape=X_tr_s.shape[1:]) - x = keras.layers.Conv1D(16, min(3, lb), padding="causal", activation="relu")(inp) - x = keras.layers.GlobalAveragePooling1D()(x) - out = keras.layers.Dense(n_classes, activation="softmax")(x) - m = keras.Model(inp, out) - m.compile(optimizer="adam", loss="sparse_categorical_crossentropy") - - hist = m.fit( - X_tr_s, y_tr, - validation_data=(X_v_s, y_v), - epochs=15, - batch_size=64, - class_weight=cw, - callbacks=[keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)], - verbose=0, - ) - val_loss = min(hist.history.get("val_loss", [np.inf])) - if val_loss < best_loss: - best_loss = val_loss - best_lb = lb - - del m - except Exception: +import pandas as pd +from datetime import datetime + +CASH_DRAWDOWN_TRIGGER = -0.15 # 2-day cumulative return threshold +CASH_EXIT_Z = 1.0 # Z-score required to exit CASH + + +def _zscore(proba: np.ndarray) -> float: + std = np.std(proba) + return float((np.max(proba) - np.mean(proba)) / std) if std > 1e-9 else 0.0 + + +def execute_strategy( + preds: np.ndarray, + proba: np.ndarray, + y_raw_test: np.ndarray, + test_dates: pd.DatetimeIndex, + target_etfs: list, + fee_bps: int, + tbill_rate: float, + include_cash: bool = True, # kept for API compat but CASH is now overlay-only +) -> dict: + n_etfs = len(target_etfs) + daily_tbill = tbill_rate / 252 + fee = fee_bps / 10000 + today = datetime.now().date() + + strat_rets = [] + audit_trail = [] + date_index = [] + + in_cash = False + recent_rets = [] # rolling 2-day window + + for i, cls in enumerate(preds): + cls = min(int(cls), n_etfs - 1) + etf_name = target_etfs[cls].replace("_Ret", "") + etf_ret = float(np.clip(y_raw_test[i][cls], -0.5, 0.5)) + z = _zscore(proba[i]) + + # ── 2-day drawdown check ────────────────────────────────────────────── + recent_rets.append(etf_ret) + if len(recent_rets) > 2: + recent_rets.pop(0) + two_day = ((1 + recent_rets[0]) * (1 + recent_rets[-1]) - 1 + if len(recent_rets) >= 2 else 0.0) + + if two_day <= CASH_DRAWDOWN_TRIGGER: + in_cash = True + if in_cash and z >= CASH_EXIT_Z: + in_cash = False + + # ── Execute ─────────────────────────────────────────────────────────── + if in_cash: + signal_etf = "CASH" + realized_ret = daily_tbill + else: + signal_etf = etf_name + realized_ret = etf_ret + + net_ret = realized_ret - fee + strat_rets.append(net_ret) + date_index.append(test_dates[i]) + + if test_dates[i].date() < today: + audit_trail.append({ + "Date": test_dates[i].strftime("%Y-%m-%d"), + "Signal": signal_etf, + "Net_Return": net_ret, + "Z_Score": round(z, 2), + }) + + strat_rets = np.array(strat_rets, dtype=np.float64) + + # Next signal + last_cls = min(int(preds[-1]), n_etfs - 1) + last_z = _zscore(proba[-1]) + last_ret = float(np.clip(y_raw_test[-1][last_cls], -0.5, 0.5)) + prev_ret = float(np.clip(y_raw_test[-2][last_cls], -0.5, 0.5)) if len(y_raw_test) > 1 else 0.0 + last_2d = (1 + prev_ret) * (1 + last_ret) - 1 + next_cash = last_2d <= CASH_DRAWDOWN_TRIGGER and last_z < CASH_EXIT_Z + next_signal = "CASH" if next_cash else target_etfs[last_cls].replace("_Ret", "") + + metrics = _compute_metrics(strat_rets, tbill_rate, date_index) + + return { + **metrics, + "strat_rets": strat_rets, + "audit_trail": audit_trail, + "next_signal": next_signal, + "next_proba": proba[-1], + } + + +def _compute_metrics(strat_rets: np.ndarray, tbill_rate: float, + date_index: list = None) -> dict: + if len(strat_rets) == 0: + return {} + + cum_returns = np.cumprod(1 + strat_rets) + n = len(strat_rets) + ann_return = float(cum_returns[-1] ** (252 / n) - 1) + + excess = strat_rets - tbill_rate / 252 + sharpe = float(np.mean(excess) / (np.std(strat_rets) + 1e-9) * np.sqrt(252)) + + hit_ratio = float(np.mean(strat_rets[-15:] > 0)) + + cum_max = np.maximum.accumulate(cum_returns) + drawdown = (cum_returns - cum_max) / cum_max + max_dd = float(np.min(drawdown)) + + worst_idx = int(np.argmin(strat_rets)) + max_daily = float(strat_rets[worst_idx]) + worst_date = (date_index[worst_idx].strftime("%Y-%m-%d") + if date_index and worst_idx < len(date_index) else "N/A") + + return { + "cum_returns": cum_returns, + "ann_return": ann_return, + "sharpe": sharpe, + "hit_ratio": hit_ratio, + "max_dd": max_dd, + "max_daily_dd": max_daily, + "max_daily_date": worst_date, + "cum_max": cum_max, + } + + +def compute_benchmark_metrics(returns: np.ndarray, tbill_rate: float) -> dict: + return _compute_metrics(np.array(returns, dtype=np.float64), tbill_rate) + + +def select_winner(results: dict) -> str: + best_name, best_ret = None, -np.inf + for name, res in results.items(): + if res is None: continue - - return best_lb + r = res.get("ann_return", -np.inf) + if r > best_ret: + best_ret, best_name = r, name + return best_name + + +def build_comparison_table(results: dict, winner_name: str) -> pd.DataFrame: + rows = [] + for name, res in results.items(): + if res is None: + rows.append({"Approach": name, "Ann. Return": "N/A", + "Sharpe": "N/A", "Hit Ratio (15d)": "N/A", + "Max Drawdown": "N/A", "Winner": ""}) + continue + rows.append({ + "Approach": name, + "Ann. Return": f"{res['ann_return']*100:.2f}%", + "Sharpe": f"{res['sharpe']:.2f}", + "Hit Ratio (15d)": f"{res['hit_ratio']*100:.0f}%", + "Max Drawdown": f"{res['max_dd']*100:.2f}%", + "Winner": "⭐ WINNER" if name == winner_name else "", + }) + return pd.DataFrame(rows) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py index 48afd9682daacbe9ce9d72e86fd2c0cc5c8292cf..5f062a870f25bb5e93ede2fbc2d52de6aff150c0 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py @@ -1,9 +1,11 @@ """ ui/components.py Reusable Streamlit UI blocks. -- Fixed applymap β†’ map deprecation -- Removed debug expanders -- Added show_all_signals_panel +Changes: +- Metrics row: Ann Return compared vs SPY (not T-bill) +- Max Daily DD: shows date it happened +- Conviction panel: compact single-line ETF list (no big bars) +- applymap β†’ map (deprecation fix) """ import streamlit as st @@ -25,8 +27,10 @@ def show_freshness_status(freshness: dict): def show_signal_banner(next_signal: str, next_date, approach_name: str): is_cash = next_signal == "CASH" - bg = ("linear-gradient(135deg, #2d3436 0%, #1a1a2e 100%)" if is_cash - else "linear-gradient(135deg, #00d1b2 0%, #00a896 100%)") + bg = ("linear-gradient(135deg, #2d3436 0%, #1a1a2e 100%)" if is_cash + else "linear-gradient(135deg, #00d1b2 0%, #00a896 100%)") + label = ("⚠️ DRAWDOWN PROTECTION ACTIVE β€” CASH" + if is_cash else f"🎯 {next_date.strftime('%Y-%m-%d')} β†’ {next_signal}") st.markdown(f"""
@@ -34,9 +38,9 @@ def show_signal_banner(next_signal: str, next_date, approach_name: str): letter-spacing:3px; margin-bottom:6px;"> {approach_name.upper()} Β· NEXT TRADING DAY SIGNAL
-

- 🎯 {next_date.strftime('%Y-%m-%d')} β†’ {next_signal} +

+ {label}

""", unsafe_allow_html=True) @@ -46,34 +50,25 @@ def show_signal_banner(next_signal: str, next_date, approach_name: str): def show_all_signals_panel(all_signals: dict, target_etfs: list, include_cash: bool, next_date, optimal_lookback: int): - APPROACH_COLORS = { - "Approach 1": "#00ffc8", - "Approach 2": "#7c6aff", - "Approach 3": "#ff6b6b", - } + COLORS = {"Approach 1": "#00ffc8", "Approach 2": "#7c6aff", "Approach 3": "#ff6b6b"} st.subheader(f"πŸ—“οΈ All Models β€” {next_date.strftime('%Y-%m-%d')} Signals") - st.caption(f"πŸ“ Optimal lookback: **{optimal_lookback}d** (auto-selected from 30/45/60)") + st.caption(f"πŸ“ Lookback **{optimal_lookback}d** found optimal (auto-selected from 30 / 45 / 60d)") cols = st.columns(len(all_signals)) for col, (name, info) in zip(cols, all_signals.items()): - color = APPROACH_COLORS.get(name, "#888888") - signal = info["signal"] - proba = info["proba"] - top_prob = float(np.max(proba)) * 100 - is_winner = info["is_winner"] - badge = " ⭐" if is_winner else "" + color = COLORS.get(name, "#888") + signal = info["signal"] + top_prob = float(np.max(info["proba"])) * 100 + badge = " ⭐" if info["is_winner"] else "" + sig_col = "#aaa" if signal == "CASH" else "white" col.markdown(f"""
- {name.upper()}{badge} -
-
- {signal} -
+ letter-spacing:2px; margin-bottom:6px;">{name.upper()}{badge}
+
{signal}
Confidence: {top_prob:.1f}%
@@ -93,14 +88,12 @@ def show_conviction_panel(conviction: dict): z_clipped = max(-3.0, min(3.0, z_score)) bar_pct = int((z_clipped + 3) / 6 * 100) - max_score = max((s for _, s in sorted_pairs), default=1.0) - if max_score <= 0: - max_score = 1.0 + # ── Header with Z-score gauge ───────────────────────────────────────────── st.markdown(f"""
{icon} @@ -118,76 +111,73 @@ def show_conviction_panel(conviction: dict): font-size:11px; color:#999; margin-bottom:4px;"> Weak −3σNeutral 0σStrong +3σ
-
+
-
- Model probability by ETF (ranked high → low): +
+ MODEL PROBABILITY BY ETF +
+
+ {"".join([ + f'' + f'{"β˜… " if n == best_name else ""}{n} {s:.3f}' + for n, s in sorted_pairs + ])}
""", unsafe_allow_html=True) - for i, (name, score) in enumerate(sorted_pairs): - is_winner = (name == best_name) - is_last = (i == len(sorted_pairs) - 1) - bar_w = int(score / max_score * 100) - name_style = "font-weight:700; color:#00897b;" if is_winner else "color:#444;" - bar_color = color if is_winner else "#b2dfdb" if score > max_score * 0.5 else "#e0e0e0" - star = " β˜…" if is_winner else "" - bottom_r = "0 0 12px 12px" if is_last else "0" - border_bot = "border-bottom:1px solid #f0f0f0;" if not is_last else "" - - st.markdown(f""" -
-
- {name}{star} -
-
-
- {score:.4f} -
-
- """, unsafe_allow_html=True) - st.caption( - "Z-score = std deviations the top ETF's probability sits above the mean. " - "Higher β†’ model is more decisive." + "Z-score = std deviations the top ETF's probability sits above the mean of all ETF probabilities. " + "Higher β†’ model is more decisive. " + "⚠️ CASH override triggers if 2-day cumulative return ≀ βˆ’15%, exits when Z β‰₯ 1.0." ) # ── Metrics row ─────────────────────────────────────────────────────────────── -def show_metrics_row(result: dict, tbill_rate: float): - col1, col2, col3, col4, col5 = st.columns(5) - col1.metric("πŸ“ˆ Ann. Return", f"{result['ann_return']*100:.2f}%", - delta=f"vs T-bill: {(result['ann_return'] - tbill_rate)*100:.2f}%") - col2.metric("πŸ“Š Sharpe", f"{result['sharpe']:.2f}", - delta="Strong" if result['sharpe'] > 1 else "Weak") - col3.metric("🎯 Hit Ratio 15d", f"{result['hit_ratio']*100:.0f}%", - delta="Good" if result['hit_ratio'] > 0.55 else "Weak") - col4.metric("πŸ“‰ Max Drawdown", f"{result['max_dd']*100:.2f}%", - delta="Peak to Trough") - col5.metric("⚠️ Max Daily DD", f"{result['max_daily_dd']*100:.2f}%", - delta="Worst Day") +def show_metrics_row(result: dict, tbill_rate: float, spy_ann_return: float = None): + c1, c2, c3, c4, c5 = st.columns(5) + + # Ann return vs SPY + if spy_ann_return is not None: + diff = (result['ann_return'] - spy_ann_return) * 100 + sign = "+" if diff >= 0 else "" + delta_str = f"vs SPY: {sign}{diff:.2f}%" + else: + delta_str = f"vs T-bill: {(result['ann_return'] - tbill_rate)*100:.2f}%" + + c1.metric("πŸ“ˆ Ann. Return", f"{result['ann_return']*100:.2f}%", delta=delta_str) + c2.metric("πŸ“Š Sharpe", f"{result['sharpe']:.2f}", + delta="Strong" if result['sharpe'] > 1 else "Weak") + c3.metric("🎯 Hit Ratio 15d", f"{result['hit_ratio']*100:.0f}%", + delta="Good" if result['hit_ratio'] > 0.55 else "Weak") + c4.metric("πŸ“‰ Max Drawdown", f"{result['max_dd']*100:.2f}%", + delta="Peak to Trough") + + # Max daily DD with date + worst_date = result.get("max_daily_date", "N/A") + c5.metric("⚠️ Max Daily DD", f"{result['max_daily_dd']*100:.2f}%", + delta=f"on {worst_date}") # ── Comparison table ────────────────────────────────────────────────────────── def show_comparison_table(comparison_df: pd.DataFrame): - def highlight_winner(row): + def _highlight(row): if "WINNER" in str(row.get("Winner", "")): return ["background-color: rgba(0,200,150,0.15); font-weight:bold"] * len(row) return [""] * len(row) styled = ( comparison_df.style - .apply(highlight_winner, axis=1) + .apply(_highlight, axis=1) .set_properties(**{"text-align": "center", "font-size": "14px"}) .set_table_styles([ {"selector": "th", "props": [("font-size", "14px"), @@ -206,16 +196,22 @@ def show_audit_trail(audit_trail: list): st.info("No audit trail data available.") return - df = pd.DataFrame(audit_trail).tail(20)[["Date", "Signal", "Net_Return"]] + df = pd.DataFrame(audit_trail).tail(20) + cols = [c for c in ["Date", "Signal", "Net_Return", "Z_Score"] if c in df.columns] + df = df[cols] - def color_return(val): + def _color_ret(val): return ("color: #00c896; font-weight:bold" if val > 0 else "color: #ff4b4b; font-weight:bold") + fmt = {"Net_Return": "{:.2%}"} + if "Z_Score" in df.columns: + fmt["Z_Score"] = "{:.2f}" + styled = ( df.style - .map(color_return, subset=["Net_Return"]) - .format({"Net_Return": "{:.2%}"}) + .map(_color_ret, subset=["Net_Return"]) + .format(fmt) .set_properties(**{"font-size": "14px", "text-align": "center"}) .set_table_styles([ {"selector": "th", "props": [("font-size", "14px"), diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/charts.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/charts.py index 3965d4d7256f2fd352e4ec58bb1b30bfb2c3fb5c..13e3961f6d6ae4013bbb4a26da3202927580491d 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/charts.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/charts.py @@ -1,14 +1,15 @@ """ ui/charts.py -Plotly chart builders. -Equity curve: winner + SPY + AGG only. Y-axis as % growth (not raw multiplier). +Equity curve: winner vs SPY and AGG only. +Y-axis: % cumulative growth from 0 (not raw multiplier). +SPY/AGG returns are verified as pct returns (clipped) before compounding. """ import numpy as np import pandas as pd import plotly.graph_objects as go -WINNER_COLOUR = "#00ffc8" +WINNER_COLOUR = "#00ffc8" BENCHMARK_COLOURS = {"SPY": "#ff4b4b", "AGG": "#ffa500"} @@ -20,10 +21,6 @@ def equity_curve_chart( test_slice: slice, tbill_rate: float, ) -> go.Figure: - """ - Equity curve: winner strategy vs SPY and AGG. - Y-axis shows % growth (cum_return - 1) * 100 for readability. - """ from strategy.backtest import compute_benchmark_metrics fig = go.Figure() @@ -32,46 +29,55 @@ def equity_curve_chart( winner_res = results.get(winner_name) if winner_res is not None: cum = winner_res["cum_returns"] - n = min(len(cum), len(plot_dates)) - fig.add_trace(go.Scatter( - x=plot_dates[:n], - y=(cum[:n] - 1) * 100, - mode="lines", - name=f"{winner_name} β˜…", - line=dict(color=WINNER_COLOUR, width=2.5), - fill="tozeroy", - fillcolor="rgba(0,255,200,0.07)", - )) + # Sanity: if cum[-1] > 10x (1000%), something is wrong β€” skip render + if cum[-1] < 10: + n = min(len(cum), len(plot_dates)) + pct = (cum[:n] - 1) * 100 + fig.add_trace(go.Scatter( + x=plot_dates[:n], y=pct, + mode="lines", + name=f"{winner_name} β˜…", + line=dict(color=WINNER_COLOUR, width=2.5), + fill="tozeroy", + fillcolor="rgba(0,255,200,0.07)", + )) - # ── SPY benchmark ───────────────────────────────────────────────────────── + # ── SPY ─────────────────────────────────────────────────────────────────── + spy_ann = None if "SPY_Ret" in df.columns: - spy_rets = df["SPY_Ret"].iloc[test_slice].values.copy() - spy_rets = np.clip(spy_rets, -0.5, 0.5) # sanity clip - spy_rets = spy_rets[~np.isnan(spy_rets)] - n = min(len(spy_rets), len(plot_dates)) - spy_m = compute_benchmark_metrics(spy_rets[:n], tbill_rate) - fig.add_trace(go.Scatter( - x=plot_dates[:n], - y=(spy_m["cum_returns"] - 1) * 100, - mode="lines", - name="SPY", - line=dict(color=BENCHMARK_COLOURS["SPY"], width=1.5, dash="dot"), - )) + raw = df["SPY_Ret"].iloc[test_slice].values.copy().astype(float) + raw = raw[~np.isnan(raw)] + # If values look like prices (median > 1), convert to returns + if len(raw) > 0 and np.median(np.abs(raw)) > 1: + raw = np.diff(raw) / raw[:-1] + raw = np.clip(raw, -0.5, 0.5) + if len(raw) > 0: + n = min(len(raw), len(plot_dates)) + spy_m = compute_benchmark_metrics(raw[:n], tbill_rate) + spy_ann = spy_m.get("ann_return") + fig.add_trace(go.Scatter( + x=plot_dates[:n], + y=(spy_m["cum_returns"] - 1) * 100, + mode="lines", name="SPY", + line=dict(color=BENCHMARK_COLOURS["SPY"], width=1.5, dash="dot"), + )) - # ── AGG benchmark ───────────────────────────────────────────────────────── + # ── AGG ─────────────────────────────────────────────────────────────────── if "AGG_Ret" in df.columns: - agg_rets = df["AGG_Ret"].iloc[test_slice].values.copy() - agg_rets = np.clip(agg_rets, -0.5, 0.5) - agg_rets = agg_rets[~np.isnan(agg_rets)] - n = min(len(agg_rets), len(plot_dates)) - agg_m = compute_benchmark_metrics(agg_rets[:n], tbill_rate) - fig.add_trace(go.Scatter( - x=plot_dates[:n], - y=(agg_m["cum_returns"] - 1) * 100, - mode="lines", - name="AGG", - line=dict(color=BENCHMARK_COLOURS["AGG"], width=1.5, dash="dot"), - )) + raw = df["AGG_Ret"].iloc[test_slice].values.copy().astype(float) + raw = raw[~np.isnan(raw)] + if len(raw) > 0 and np.median(np.abs(raw)) > 1: + raw = np.diff(raw) / raw[:-1] + raw = np.clip(raw, -0.5, 0.5) + if len(raw) > 0: + n = min(len(raw), len(plot_dates)) + agg_m = compute_benchmark_metrics(raw[:n], tbill_rate) + fig.add_trace(go.Scatter( + x=plot_dates[:n], + y=(agg_m["cum_returns"] - 1) * 100, + mode="lines", name="AGG", + line=dict(color=BENCHMARK_COLOURS["AGG"], width=1.5, dash="dot"), + )) fig.update_layout( template="plotly_dark", @@ -80,13 +86,7 @@ def equity_curve_chart( legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01, font=dict(size=11)), xaxis_title="Date", yaxis_title="Cumulative Return (%)", - margin=dict(l=50, r=30, t=20, b=50), yaxis=dict(ticksuffix="%"), + margin=dict(l=50, r=30, t=20, b=50), ) - return fig - - -def _hex_to_rgb(hex_color: str) -> str: - h = hex_color.lstrip("#") - r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16) - return f"{r},{g},{b}" + return fig, spy_ann diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py index 9d6c137e9000054c5531972594c2d085e74782b0..36fe25e40e923799b2a57e008d8aba1d83178e3d 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py @@ -1,171 +1,162 @@ """ -strategy/backtest.py -Strategy execution, performance metrics, and benchmark calculations. - -CASH logic (drawdown risk overlay β€” not a model class): - ENTER : 2-day cumulative return <= -15% - EXIT : model conviction Z-score >= 1.0 (model decisively picks an ETF again) +models/base.py +Shared utilities for all CNN-LSTM variants. +Optimised for CPU training on HF Spaces. """ import numpy as np -import pandas as pd -from datetime import datetime - -CASH_DRAWDOWN_TRIGGER = -0.15 # 2-day cumulative return threshold -CASH_EXIT_Z = 1.0 # Z-score required to exit CASH - - -def _zscore(proba: np.ndarray) -> float: - std = np.std(proba) - return float((np.max(proba) - np.mean(proba)) / std) if std > 1e-9 else 0.0 - - -def execute_strategy( - preds: np.ndarray, - proba: np.ndarray, - y_raw_test: np.ndarray, - test_dates: pd.DatetimeIndex, - target_etfs: list, - fee_bps: int, - tbill_rate: float, - include_cash: bool = True, # kept for API compat but CASH is now overlay-only -) -> dict: - n_etfs = len(target_etfs) - daily_tbill = tbill_rate / 252 - fee = fee_bps / 10000 - today = datetime.now().date() - - strat_rets = [] - audit_trail = [] - date_index = [] - - in_cash = False - recent_rets = [] # rolling 2-day window - - for i, cls in enumerate(preds): - cls = min(int(cls), n_etfs - 1) - etf_name = target_etfs[cls].replace("_Ret", "") - etf_ret = float(np.clip(y_raw_test[i][cls], -0.5, 0.5)) - z = _zscore(proba[i]) - - # ── 2-day drawdown check ────────────────────────────────────────────── - recent_rets.append(etf_ret) - if len(recent_rets) > 2: - recent_rets.pop(0) - two_day = ((1 + recent_rets[0]) * (1 + recent_rets[-1]) - 1 - if len(recent_rets) >= 2 else 0.0) - - if two_day <= CASH_DRAWDOWN_TRIGGER: - in_cash = True - if in_cash and z >= CASH_EXIT_Z: - in_cash = False - - # ── Execute ─────────────────────────────────────────────────────────── - if in_cash: - signal_etf = "CASH" - realized_ret = daily_tbill - else: - signal_etf = etf_name - realized_ret = etf_ret - - net_ret = realized_ret - fee - strat_rets.append(net_ret) - date_index.append(test_dates[i]) - - if test_dates[i].date() < today: - audit_trail.append({ - "Date": test_dates[i].strftime("%Y-%m-%d"), - "Signal": signal_etf, - "Net_Return": net_ret, - "Z_Score": round(z, 2), - }) - - strat_rets = np.array(strat_rets, dtype=np.float64) - - # Next signal - last_cls = min(int(preds[-1]), n_etfs - 1) - last_z = _zscore(proba[-1]) - last_ret = float(np.clip(y_raw_test[-1][last_cls], -0.5, 0.5)) - prev_ret = float(np.clip(y_raw_test[-2][last_cls], -0.5, 0.5)) if len(y_raw_test) > 1 else 0.0 - last_2d = (1 + prev_ret) * (1 + last_ret) - 1 - next_cash = last_2d <= CASH_DRAWDOWN_TRIGGER and last_z < CASH_EXIT_Z - next_signal = "CASH" if next_cash else target_etfs[last_cls].replace("_Ret", "") - - metrics = _compute_metrics(strat_rets, tbill_rate, date_index) - - return { - **metrics, - "strat_rets": strat_rets, - "audit_trail": audit_trail, - "next_signal": next_signal, - "next_proba": proba[-1], - } - - -def _compute_metrics(strat_rets: np.ndarray, tbill_rate: float, - date_index: list = None) -> dict: - if len(strat_rets) == 0: - return {} - - cum_returns = np.cumprod(1 + strat_rets) - n = len(strat_rets) - ann_return = float(cum_returns[-1] ** (252 / n) - 1) - - excess = strat_rets - tbill_rate / 252 - sharpe = float(np.mean(excess) / (np.std(strat_rets) + 1e-9) * np.sqrt(252)) - - hit_ratio = float(np.mean(strat_rets[-15:] > 0)) - - cum_max = np.maximum.accumulate(cum_returns) - drawdown = (cum_returns - cum_max) / cum_max - max_dd = float(np.min(drawdown)) - - worst_idx = int(np.argmin(strat_rets)) - max_daily = float(strat_rets[worst_idx]) - worst_date = (date_index[worst_idx].strftime("%Y-%m-%d") - if date_index and worst_idx < len(date_index) else "N/A") - - return { - "cum_returns": cum_returns, - "ann_return": ann_return, - "sharpe": sharpe, - "hit_ratio": hit_ratio, - "max_dd": max_dd, - "max_daily_dd": max_daily, - "max_daily_date": worst_date, - "cum_max": cum_max, - } - - -def compute_benchmark_metrics(returns: np.ndarray, tbill_rate: float) -> dict: - return _compute_metrics(np.array(returns, dtype=np.float64), tbill_rate) - - -def select_winner(results: dict) -> str: - best_name, best_ret = None, -np.inf - for name, res in results.items(): - if res is None: - continue - r = res.get("ann_return", -np.inf) - if r > best_ret: - best_ret, best_name = r, name - return best_name - - -def build_comparison_table(results: dict, winner_name: str) -> pd.DataFrame: - rows = [] - for name, res in results.items(): - if res is None: - rows.append({"Approach": name, "Ann. Return": "N/A", - "Sharpe": "N/A", "Hit Ratio (15d)": "N/A", - "Max Drawdown": "N/A", "Winner": ""}) +import hashlib +import pickle +from pathlib import Path +from sklearn.preprocessing import RobustScaler +from sklearn.utils.class_weight import compute_class_weight + +SEED = 42 +CACHE_DIR = Path("/tmp/p2_model_cache") +CACHE_DIR.mkdir(exist_ok=True) +np.random.seed(SEED) + + +# ── Cache helpers ───────────────────────────────────────────────────────────── + +def make_cache_key(last_date, start_yr, fee_bps, epochs, split, include_cash, lookback): + raw = f"{last_date}_{start_yr}_{fee_bps}_{epochs}_{split}_{include_cash}_{lookback}" + return hashlib.md5(raw.encode()).hexdigest() + + +def save_cache(key, payload): + with open(CACHE_DIR / f"{key}.pkl", "wb") as f: + pickle.dump(payload, f) + + +def load_cache(key): + path = CACHE_DIR / f"{key}.pkl" + if path.exists(): + try: + with open(path, "rb") as f: + return pickle.load(f) + except Exception: + path.unlink(missing_ok=True) + return None + + +# ── Sequence builder ────────────────────────────────────────────────────────── + +def build_sequences(features, targets, lookback): + X, y = [], [] + for i in range(lookback, len(features)): + X.append(features[i - lookback: i]) + y.append(targets[i]) + return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32) + + +# ── Train / val / test split ────────────────────────────────────────────────── + +def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15): + n = len(X) + t1 = int(n * train_pct) + t2 = int(n * (train_pct + val_pct)) + return X[:t1], y[:t1], X[t1:t2], y[t1:t2], X[t2:], y[t2:] + + +# ── Feature scaling ─────────────────────────────────────────────────────────── + +def scale_features(X_train, X_val, X_test): + n_feat = X_train.shape[2] + scaler = RobustScaler() + scaler.fit(X_train.reshape(-1, n_feat)) + def _t(X): + s = X.shape + return scaler.transform(X.reshape(-1, n_feat)).reshape(s) + return _t(X_train), _t(X_val), _t(X_test), scaler + + +# ── Label builder (no CASH class β€” CASH is a risk overlay) ─────────────────── + +def returns_to_labels(y_raw): + """Simple argmax β€” model always predicts one of the ETFs.""" + return np.argmax(y_raw, axis=1).astype(np.int32) + + +# ── Class weights ───────────────────────────────────────────────────────────── + +def compute_class_weights(y_labels, n_classes): + present = np.unique(y_labels) + try: + weights = compute_class_weight("balanced", classes=present, y=y_labels) + weight_dict = {int(c): float(w) for c, w in zip(present, weights)} + except Exception: + weight_dict = {} + for c in range(n_classes): + if c not in weight_dict: + weight_dict[c] = 1.0 + return weight_dict + + +# ── Callbacks ───────────────────────────────────────────────────────────────── + +def get_callbacks(patience_es=15, patience_lr=8, min_lr=1e-6): + from tensorflow import keras + return [ + keras.callbacks.EarlyStopping( + monitor="val_loss", patience=patience_es, + restore_best_weights=True, verbose=0, + ), + keras.callbacks.ReduceLROnPlateau( + monitor="val_loss", factor=0.5, + patience=patience_lr, min_lr=min_lr, verbose=0, + ), + ] + + +# ── Output head ─────────────────────────────────────────────────────────────── + +def classification_head(x, n_classes, dropout=0.3): + from tensorflow import keras + x = keras.layers.Dense(32, activation="relu")(x) + x = keras.layers.Dropout(dropout)(x) + x = keras.layers.Dense(n_classes, activation="softmax")(x) + return x + + +# ── Auto lookback selection (Approach 1 proxy, fast) ───────────────────────── + +def find_best_lookback(X_raw, y_raw, train_pct, val_pct, n_classes, + candidates=None): + from tensorflow import keras + + if candidates is None: + candidates = [30, 45, 60] + + best_lb, best_loss = candidates[0], np.inf + + for lb in candidates: + try: + X_seq, y_seq = build_sequences(X_raw, y_raw, lb) + y_lab = returns_to_labels(y_seq) + X_tr, y_tr, X_v, y_v, _, _ = train_val_test_split(X_seq, y_lab, train_pct, val_pct) + X_tr_s, X_v_s, _, _ = scale_features(X_tr, X_v, X_v) + cw = compute_class_weights(y_tr, n_classes) + + inp = keras.Input(shape=X_tr_s.shape[1:]) + x = keras.layers.Conv1D(16, min(3, lb), padding="causal", activation="relu")(inp) + x = keras.layers.GlobalAveragePooling1D()(x) + out = keras.layers.Dense(n_classes, activation="softmax")(x) + m = keras.Model(inp, out) + m.compile(optimizer="adam", loss="sparse_categorical_crossentropy") + + hist = m.fit( + X_tr_s, y_tr, + validation_data=(X_v_s, y_v), + epochs=15, batch_size=64, class_weight=cw, + callbacks=[keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)], + verbose=0, + ) + val_loss = min(hist.history.get("val_loss", [np.inf])) + if val_loss < best_loss: + best_loss, best_lb = val_loss, lb + del m + except Exception: continue - rows.append({ - "Approach": name, - "Ann. Return": f"{res['ann_return']*100:.2f}%", - "Sharpe": f"{res['sharpe']:.2f}", - "Hit Ratio (15d)": f"{res['hit_ratio']*100:.0f}%", - "Max Drawdown": f"{res['max_dd']*100:.2f}%", - "Winner": "⭐ WINNER" if name == winner_name else "", - }) - return pd.DataFrame(rows) + + return best_lb diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py index 5f062a870f25bb5e93ede2fbc2d52de6aff150c0..af700cba7a4ef3f6441a9647158f9d7115df6af0 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py @@ -76,6 +76,25 @@ def show_all_signals_panel(all_signals: dict, target_etfs: list, """, unsafe_allow_html=True) +def _build_etf_badges(sorted_pairs: list, best_name: str, color: str) -> str: + """Build compact ETF probability badges as HTML string.""" + badges = [] + for name, score in sorted_pairs: + is_best = name == best_name + bg = "#e8fdf7" if is_best else "#f8f8f8" + border = color if is_best else "#ddd" + txt_col = color if is_best else "#555" + weight = "700" if is_best else "400" + star = "β˜… " if is_best else "" + badges.append( + f'' + f'{star}{name} {score:.3f}' + ) + return "".join(badges) + + # ── Signal conviction panel ─────────────────────────────────────────────────── def show_conviction_panel(conviction: dict): @@ -121,14 +140,7 @@ def show_conviction_panel(conviction: dict): MODEL PROBABILITY BY ETF
- {"".join([ - f'' - f'{"β˜… " if n == best_name else ""}{n} {s:.3f}' - for n, s in sorted_pairs - ])} + {_build_etf_badges(sorted_pairs, best_name, color)}
""", unsafe_allow_html=True) diff --git a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py index 36fe25e40e923799b2a57e008d8aba1d83178e3d..17780796b4e1482fc78b2965712426d2e53ad3fc 100644 --- a/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py +++ b/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py @@ -122,7 +122,7 @@ def classification_head(x, n_classes, dropout=0.3): # ── Auto lookback selection (Approach 1 proxy, fast) ───────────────────────── def find_best_lookback(X_raw, y_raw, train_pct, val_pct, n_classes, - candidates=None): + include_cash=False, candidates=None): from tensorflow import keras if candidates is None: diff --git a/hf_space/hf_space/hf_space/hf_space/models/base.py b/hf_space/hf_space/hf_space/hf_space/models/base.py index 17780796b4e1482fc78b2965712426d2e53ad3fc..4fe679ed1a7ee751931f365fcacae60b546c593a 100644 --- a/hf_space/hf_space/hf_space/hf_space/models/base.py +++ b/hf_space/hf_space/hf_space/hf_space/models/base.py @@ -20,7 +20,7 @@ np.random.seed(SEED) # ── Cache helpers ───────────────────────────────────────────────────────────── def make_cache_key(last_date, start_yr, fee_bps, epochs, split, include_cash, lookback): - raw = f"{last_date}_{start_yr}_{fee_bps}_{epochs}_{split}_{include_cash}_{lookback}" + raw = f"v2_{last_date}_{start_yr}_{fee_bps}_{epochs}_{split}_{include_cash}_{lookback}" return hashlib.md5(raw.encode()).hexdigest() diff --git a/hf_space/hf_space/hf_space/models/base.py b/hf_space/hf_space/hf_space/models/base.py index 4fe679ed1a7ee751931f365fcacae60b546c593a..b178b8c7a5cc95607c2545447da03ceb360a6d3b 100644 --- a/hf_space/hf_space/hf_space/models/base.py +++ b/hf_space/hf_space/hf_space/models/base.py @@ -14,6 +14,23 @@ from sklearn.utils.class_weight import compute_class_weight SEED = 42 CACHE_DIR = Path("/tmp/p2_model_cache") CACHE_DIR.mkdir(exist_ok=True) + +# Clear any v1 cache files (missing max_daily_date field) +for _f in CACHE_DIR.glob("*.pkl"): + try: + import pickle as _pkl + with open(_f, "rb") as _fh: + _d = _pkl.load(_fh) + # If any result dict lacks max_daily_date, bust the whole cache + if isinstance(_d, dict) and "results" in _d: + _needs_bust = any( + isinstance(r, dict) and "max_daily_date" not in r + for r in _d["results"].values() if r is not None + ) + if _needs_bust: + _f.unlink(missing_ok=True) + except Exception: + _f.unlink(missing_ok=True) np.random.seed(SEED) diff --git a/hf_space/hf_space/ui/components.py b/hf_space/hf_space/ui/components.py index af700cba7a4ef3f6441a9647158f9d7115df6af0..c5dd92131d74943e3b1daaff0d2ada49f634fed2 100644 --- a/hf_space/hf_space/ui/components.py +++ b/hf_space/hf_space/ui/components.py @@ -173,10 +173,10 @@ def show_metrics_row(result: dict, tbill_rate: float, spy_ann_return: float = No c4.metric("πŸ“‰ Max Drawdown", f"{result['max_dd']*100:.2f}%", delta="Peak to Trough") - # Max daily DD with date + # Max daily DD with date (only show date if available) worst_date = result.get("max_daily_date", "N/A") - c5.metric("⚠️ Max Daily DD", f"{result['max_daily_dd']*100:.2f}%", - delta=f"on {worst_date}") + dd_delta = f"on {worst_date}" if worst_date != "N/A" else "Worst Single Day" + c5.metric("⚠️ Max Daily DD", f"{result['max_daily_dd']*100:.2f}%", delta=dd_delta) # ── Comparison table ──────────────────────────────────────────────────────────