Spaces:

P2SAMAPA
/

P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES

Running

App Files Files Community

GitHub Actions commited on 13 days ago

Commit

1b670cd

1 Parent(s): a84f653

Sync from GitHub: 39a55a42ff2cf3284376a3ecac22623e752d9e78

Browse files

Files changed (50) hide show

hf_space/hf_space/hf_space/hf_space/app.py +88 -49
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +31 -39
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py +119 -88
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +51 -62
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py +101 -108
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +68 -51
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py +96 -76
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/data/__init__.py +1 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +12 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/__init__.py +1 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py +215 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +110 -14
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +273 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes +35 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile +20 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +19 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt +3 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/src/streamlit_app.py +40 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt +29 -3
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py +199 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py +167 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py +1 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py +150 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/backtest.py +193 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/conviction.py +93 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py +229 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/charts.py +144 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/calendar.py +91 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/__init__.py +1 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py +1 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/__init__.py +1 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/__init__.py +1 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/__init__.py +1 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/models/__init__.py +1 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py +1 -1
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/__init__.py +1 -1
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py +1 -1
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/__init__.py +1 -1
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/__init__.py +1 -1
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py +216 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py +67 -93
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py +28 -109
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py +4 -1
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py +31 -101
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py +43 -0
hf_space/hf_space/hf_space/hf_space/hf_space/strategy/backtest.py +2 -0
hf_space/hf_space/hf_space/ui/components.py +108 -153
hf_space/hf_space/ui/charts.py +31 -83
hf_space/models/base.py +108 -81
models/approach1_wavelet.py +17 -9

hf_space/hf_space/hf_space/hf_space/app.py CHANGED Viewed

@@ -13,7 +13,9 @@ from data.loader      import (load_dataset, check_data_freshness,
                                get_features_and_targets, dataset_summary)
 from utils.calendar   import get_est_time, get_next_signal_date
 from models.base      import (build_sequences, train_val_test_split,
-                               scale_features, returns_to_labels)
 from models.approach1_wavelet    import train_approach1, predict_approach1
 from models.approach2_regime     import train_approach2, predict_approach2
 from models.approach3_multiscale import train_approach3, predict_approach3
@@ -39,8 +41,7 @@ with st.sidebar:
     start_yr     = st.slider("📅 Start Year", 2010, 2024, 2016)
     fee_bps      = st.slider("💰 Fee (bps)", 0, 50, 10)
-    lookback     = st.slider("📐 Lookback (days)", 20, 60, 30, step=5)
-    epochs       = st.number_input("🔁 Max Epochs", 20, 300, 100, step=10)
     st.divider()
     split_option = st.selectbox("📊 Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0)
@@ -109,7 +110,7 @@ st.info(
     f"**T-bill:** {tbill_rate*100:.2f}%"
 )
-# ── Build sequences ───────────────────────────────────────────────────────────
 X_raw = df[input_features].values.astype(np.float32)
 y_raw = df[target_etfs].values.astype(np.float32)
@@ -117,39 +118,74 @@ for j in range(X_raw.shape[1]):
     mask = np.isnan(X_raw[:, j])
     if mask.any():
         X_raw[mask, j] = np.nanmean(X_raw[:, j])
 for j in range(y_raw.shape[1]):
     mask = np.isnan(y_raw[:, j])
     if mask.any():
         y_raw[mask, j] = np.nanmean(y_raw[:, j])
-X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
-y_labels     = returns_to_labels(y_seq, include_cash=include_cash)
-(X_train, y_train_r, X_val, y_val_r,
- X_test,  y_test_r)  = train_val_test_split(X_seq, y_seq,    train_pct, val_pct)
-(_,       y_train_l,  _,    y_val_l,
- _,       _)         = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
-X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
-train_size = len(X_train)
-val_size   = len(X_val)
-test_start = lookback + train_size + val_size
-test_dates = df.index[test_start: test_start + len(X_test)]
-test_slice = slice(test_start, test_start + len(X_test))
-st.success(f"✅ Sequences — Train: {train_size:,} · Val: {val_size:,} · Test: {len(X_test):,}")
-# ── Train all three approaches ────────────────────────────────────────────────
-results      = {}
-trained_info = {}
-progress     = st.progress(0, text="Starting training...")
-# Approach 1
-with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
     try:
-        model1, hist1, _ = train_approach1(
             X_train_s, y_train_l, X_val_s, y_val_l,
             n_classes=n_classes, epochs=int(epochs),
         )
@@ -159,17 +195,15 @@ with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
             target_etfs, fee_bps, tbill_rate, include_cash,
         )
         trained_info["Approach 1"] = {"proba": proba1}
-        st.success("✅ Approach 1 complete")
     except Exception as e:
         st.warning(f"⚠️ Approach 1 failed: {e}")
         results["Approach 1"] = None
-progress.progress(33, text="Approach 1 done...")
-# Approach 2
-with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
     try:
-        model2, hist2, hmm2, regime_cols2 = train_approach2(
             X_train_s, y_train_l, X_val_s, y_val_l,
             X_flat_all=X_raw, feature_names=input_features,
             lookback=lookback, train_size=train_size, val_size=val_size,
@@ -184,17 +218,15 @@ with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
             target_etfs, fee_bps, tbill_rate, include_cash,
         )
         trained_info["Approach 2"] = {"proba": proba2}
-        st.success("✅ Approach 2 complete")
     except Exception as e:
         st.warning(f"⚠️ Approach 2 failed: {e}")
         results["Approach 2"] = None
-progress.progress(66, text="Approach 2 done...")
-# Approach 3
-with st.spinner("📡 Training Approach 3 — Multi-Scale CNN-LSTM..."):
     try:
-        model3, hist3 = train_approach3(
             X_train_s, y_train_l, X_val_s, y_val_l,
             n_classes=n_classes, epochs=int(epochs),
         )
@@ -204,13 +236,20 @@ with st.spinner("📡 Training Approach 3 — Multi-Scale CNN-LSTM..."):
             target_etfs, fee_bps, tbill_rate, include_cash,
         )
         trained_info["Approach 3"] = {"proba": proba3}
-        st.success("✅ Approach 3 complete")
     except Exception as e:
         st.warning(f"⚠️ Approach 3 failed: {e}")
         results["Approach 3"] = None
-progress.progress(100, text="All approaches complete!")
-progress.empty()
 # ── Select winner ─────────────────────────────────────────────────────────────
 winner_name = select_winner(results)
@@ -226,14 +265,14 @@ st.divider()
 # ── Winner signal banner ──────────────────────────────────────────────────────
 show_signal_banner(winner_res["next_signal"], next_date, winner_name)
-# ── Conviction panel (winner only) ────────────────────────────────────────────
 winner_proba = trained_info[winner_name]["proba"]
 conviction   = compute_conviction(winner_proba[-1], target_etfs, include_cash)
 show_conviction_panel(conviction)
 st.divider()
-# ── All models' next day signals ──────────────────────────────────────────────
 all_signals = {
     name: {
         "signal":    res["next_signal"],
@@ -242,7 +281,7 @@ all_signals = {
     }
     for name, res in results.items() if res is not None
 }
-show_all_signals_panel(all_signals, target_etfs, include_cash, next_date)
 st.divider()
@@ -259,13 +298,13 @@ show_comparison_table(comparison_df)
 st.divider()
-# ── Equity curves ─────────────────────────────────────────────────────────────
-st.subheader("📈 Out-of-Sample Equity Curves — All Approaches vs Benchmarks")
 fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
 st.plotly_chart(fig, use_container_width=True)
 st.divider()
-# ── Audit trail (winner) ──────────────────────────────────────────────────────
 st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
 show_audit_trail(winner_res["audit_trail"])

                                get_features_and_targets, dataset_summary)
 from utils.calendar   import get_est_time, get_next_signal_date
 from models.base      import (build_sequences, train_val_test_split,
+                               scale_features, returns_to_labels,
+                               find_best_lookback, make_cache_key,
+                               save_cache, load_cache)
 from models.approach1_wavelet    import train_approach1, predict_approach1
 from models.approach2_regime     import train_approach2, predict_approach2
 from models.approach3_multiscale import train_approach3, predict_approach3
     start_yr     = st.slider("📅 Start Year", 2010, 2024, 2016)
     fee_bps      = st.slider("💰 Fee (bps)", 0, 50, 10)
+    epochs       = st.number_input("🔁 Max Epochs", 20, 150, 80, step=10)
     st.divider()
     split_option = st.selectbox("📊 Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0)
     f"**T-bill:** {tbill_rate*100:.2f}%"
 )
+# ── Prepare raw arrays ────────────────────────────────────────────────────────
 X_raw = df[input_features].values.astype(np.float32)
 y_raw = df[target_etfs].values.astype(np.float32)
     mask = np.isnan(X_raw[:, j])
     if mask.any():
         X_raw[mask, j] = np.nanmean(X_raw[:, j])
 for j in range(y_raw.shape[1]):
     mask = np.isnan(y_raw[:, j])
     if mask.any():
         y_raw[mask, j] = np.nanmean(y_raw[:, j])
+# ── Auto-select optimal lookback ──────────────────────────────────────────────
+last_date_str = str(freshness.get("last_date_in_data", "unknown"))
+# Check cache for lookback selection too
+lb_cache_key = make_cache_key(
+    last_date_str, start_yr, fee_bps, int(epochs), split_option, include_cash, 0
+)
+lb_cached = load_cache(f"lb_{lb_cache_key}")
+if lb_cached is not None:
+    optimal_lookback = lb_cached["optimal_lookback"]
+    st.success(f"⚡ Loaded from cache · Optimal lookback: **{optimal_lookback}d**")
+else:
+    with st.spinner("🔍 Finding optimal lookback (30 / 45 / 60d)..."):
+        def _y_labels_fn(y_seq):
+            return returns_to_labels(y_seq, include_cash=include_cash)
+        optimal_lookback = find_best_lookback(
+            X_raw, y_raw, _y_labels_fn,
+            train_pct, val_pct, n_classes, include_cash,
+            candidates=[30, 45, 60],
+        )
+    save_cache(f"lb_{lb_cache_key}", {"optimal_lookback": optimal_lookback})
+    st.success(f"📐 Optimal lookback: **{optimal_lookback}d** (auto-selected from 30/45/60)")
+lookback = optimal_lookback
+# ── Check full model cache ────────────────────────────────────────────────────
+cache_key    = make_cache_key(last_date_str, start_yr, fee_bps, int(epochs),
+                               split_option, include_cash, lookback)
+cached_data  = load_cache(cache_key)
+from_cache   = cached_data is not None
+if from_cache:
+    results      = cached_data["results"]
+    trained_info = cached_data["trained_info"]
+    test_dates   = pd.DatetimeIndex(cached_data["test_dates"])
+    test_slice   = cached_data["test_slice"]
+    st.success("⚡ Results loaded from cache — no retraining needed.")
+else:
+    # ── Build sequences ───────────────────────────────────────────────────────
+    X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
+    y_labels     = returns_to_labels(y_seq, include_cash=include_cash)
+    (X_train, y_train_r, X_val, y_val_r,
+     X_test,  y_test_r)  = train_val_test_split(X_seq, y_seq,    train_pct, val_pct)
+    (_,       y_train_l,  _,    y_val_l,
+     _,       _)         = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
+    X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
+    train_size = len(X_train)
+    val_size   = len(X_val)
+    test_start = lookback + train_size + val_size
+    test_dates = df.index[test_start: test_start + len(X_test)]
+    test_slice = slice(test_start, test_start + len(X_test))
+    results      = {}
+    trained_info = {}
+    progress     = st.progress(0, text="Training Approach 1...")
+    # ── Approach 1 ────────────────────────────────────────────────────────────
     try:
+        model1, _, _ = train_approach1(
             X_train_s, y_train_l, X_val_s, y_val_l,
             n_classes=n_classes, epochs=int(epochs),
         )
             target_etfs, fee_bps, tbill_rate, include_cash,
         )
         trained_info["Approach 1"] = {"proba": proba1}
     except Exception as e:
         st.warning(f"⚠️ Approach 1 failed: {e}")
         results["Approach 1"] = None
+    progress.progress(33, text="Training Approach 2...")
+    # ── Approach 2 ────────────────────────────────────────────────────────────
     try:
+        model2, _, hmm2, regime_cols2 = train_approach2(
             X_train_s, y_train_l, X_val_s, y_val_l,
             X_flat_all=X_raw, feature_names=input_features,
             lookback=lookback, train_size=train_size, val_size=val_size,
             target_etfs, fee_bps, tbill_rate, include_cash,
         )
         trained_info["Approach 2"] = {"proba": proba2}
     except Exception as e:
         st.warning(f"⚠️ Approach 2 failed: {e}")
         results["Approach 2"] = None
+    progress.progress(66, text="Training Approach 3...")
+    # ── Approach 3 ────────────────────────────────────────────────────────────
     try:
+        model3, _ = train_approach3(
             X_train_s, y_train_l, X_val_s, y_val_l,
             n_classes=n_classes, epochs=int(epochs),
         )
             target_etfs, fee_bps, tbill_rate, include_cash,
         )
         trained_info["Approach 3"] = {"proba": proba3}
     except Exception as e:
         st.warning(f"⚠️ Approach 3 failed: {e}")
         results["Approach 3"] = None
+    progress.progress(100, text="Done!")
+    progress.empty()
+    # ── Save to cache ─────────────────────────────────────────────────────────
+    save_cache(cache_key, {
+        "results":      results,
+        "trained_info": trained_info,
+        "test_dates":   list(test_dates),
+        "test_slice":   test_slice,
+    })
 # ── Select winner ─────────────────────────────────────────────────────────────
 winner_name = select_winner(results)
 # ── Winner signal banner ──────────────────────────────────────────────────────
 show_signal_banner(winner_res["next_signal"], next_date, winner_name)
+# ── Conviction panel ──────────────────────────────────────────────────────────
 winner_proba = trained_info[winner_name]["proba"]
 conviction   = compute_conviction(winner_proba[-1], target_etfs, include_cash)
 show_conviction_panel(conviction)
 st.divider()
+# ── All models next day signals ───────────────────────────────────────────────
 all_signals = {
     name: {
         "signal":    res["next_signal"],
     }
     for name, res in results.items() if res is not None
 }
+show_all_signals_panel(all_signals, target_etfs, include_cash, next_date, optimal_lookback)
 st.divider()
 st.divider()
+# ── Equity curve ──────────────────────────────────────────────────────────────
+st.subheader(f"📈 {winner_name} vs SPY & AGG — Out-of-Sample")
 fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
 st.plotly_chart(fig, use_container_width=True)
 st.divider()
+# ── Audit trail ───────────────────────────────────────────────────────────────
 st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
 show_audit_trail(winner_res["audit_trail"])

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py CHANGED Viewed

@@ -11,7 +11,7 @@ import numpy as np
 from data.loader      import (load_dataset, check_data_freshness,
                                get_features_and_targets, dataset_summary)
-from utils.calendar   import get_est_time, is_sync_window, get_next_signal_date
 from models.base      import (build_sequences, train_val_test_split,
                                scale_features, returns_to_labels)
 from models.approach1_wavelet    import train_approach1, predict_approach1
@@ -22,8 +22,9 @@ from signals.conviction import compute_conviction
 from ui.components import (
     show_freshness_status, show_signal_banner, show_conviction_panel,
     show_metrics_row, show_comparison_table, show_audit_trail,
 )
-from ui.charts import equity_curve_chart, comparison_bar_chart
 st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="🧠", layout="wide")
@@ -34,12 +35,8 @@ with st.sidebar:
     st.header("⚙️ Configuration")
     now_est = get_est_time()
     st.write(f"🕒 **EST:** {now_est.strftime('%H:%M:%S')}")
-    if is_sync_window():
-        st.success("✅ Sync Window Active")
-    else:
-        st.info("⏸️ Sync Window Inactive")
     st.divider()
     start_yr     = st.slider("📅 Start Year", 2010, 2024, 2016)
     fee_bps      = st.slider("💰 Fee (bps)", 0, 50, 10)
     lookback     = st.slider("📐 Lookback (days)", 20, 60, 30, step=5)
@@ -87,9 +84,6 @@ with st.sidebar:
         st.write(f"**Macro:** {', '.join(summary['macro_found'])}")
         st.write(f"**T-bill col:** {'✅' if summary['tbill_found'] else '❌'}")
-        with st.expander("🔍 All columns"):
-            st.write(summary["all_cols"])
 if not run_button:
     st.info("👈 Configure parameters and click **🚀 Run All 3 Approaches**.")
     st.stop()
@@ -101,7 +95,7 @@ st.write(f"📅 **Data:** {df.index[0].strftime('%Y-%m-%d')} → {df.index[-1].s
 # ── Features & targets ────────────────────────────────────────────────────────
 try:
-    input_features, target_etfs, tbill_rate, df, col_info = get_features_and_targets(df)
 except ValueError as e:
     st.error(str(e))
     st.stop()
@@ -109,18 +103,6 @@ except ValueError as e:
 n_etfs    = len(target_etfs)
 n_classes = n_etfs + (1 if include_cash else 0)
-# ── Show column detection diagnostics ────────────────────────────────────────
-with st.expander("🔬 Column detection diagnostics", expanded=False):
-    st.write("**How each ETF column was interpreted:**")
-    for col, info in col_info.items():
-        st.write(f"- `{col}`: {info}")
-    st.write(f"**Input features ({len(input_features)}):** {input_features}")
-    st.write(f"**T-bill rate used:** {tbill_rate*100:.3f}%")
-    # Show sample return values to verify correctness
-    st.write("**Sample target return values (last 3 rows):**")
-    st.dataframe(df[target_etfs].tail(3))
 st.info(
     f"🎯 **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])}  ·  "
     f"**Features:** {len(input_features)} signals  ·  "
@@ -131,19 +113,15 @@ st.info(
 X_raw = df[input_features].values.astype(np.float32)
 y_raw = df[target_etfs].values.astype(np.float32)
-# Fill NaNs
-col_means = np.nanmean(X_raw, axis=0)
 for j in range(X_raw.shape[1]):
     mask = np.isnan(X_raw[:, j])
     if mask.any():
-        X_raw[mask, j] = col_means[j]
-# Also fill NaNs in y_raw
-y_means = np.nanmean(y_raw, axis=0)
 for j in range(y_raw.shape[1]):
     mask = np.isnan(y_raw[:, j])
     if mask.any():
-        y_raw[mask, j] = y_means[j]
 X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
 y_labels     = returns_to_labels(y_seq, include_cash=include_cash)
@@ -151,7 +129,7 @@ y_labels     = returns_to_labels(y_seq, include_cash=include_cash)
 (X_train, y_train_r, X_val, y_val_r,
  X_test,  y_test_r)  = train_val_test_split(X_seq, y_seq,    train_pct, val_pct)
 (_,       y_train_l,  _,    y_val_l,
- _,       y_test_l)  = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
 X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
@@ -163,14 +141,6 @@ test_slice = slice(test_start, test_start + len(X_test))
 st.success(f"✅ Sequences — Train: {train_size:,} · Val: {val_size:,} · Test: {len(X_test):,}")
-# Show class distribution to check for degenerate labels
-with st.expander("🔬 Label distribution (train set)", expanded=False):
-    unique, counts = np.unique(y_train_l, return_counts=True)
-    label_names = [target_etfs[i].replace("_Ret","") if i < n_etfs else "CASH" for i in unique]
-    dist_df = pd.DataFrame({"Class": label_names, "Count": counts,
-                             "Pct": (counts / counts.sum() * 100).round(1)})
-    st.dataframe(dist_df)
 # ── Train all three approaches ────────────────────────────────────────────────
 results      = {}
 trained_info = {}
@@ -253,27 +223,49 @@ if winner_res is None:
 next_date = get_next_signal_date()
 st.divider()
 show_signal_banner(winner_res["next_signal"], next_date, winner_name)
 winner_proba = trained_info[winner_name]["proba"]
 conviction   = compute_conviction(winner_proba[-1], target_etfs, include_cash)
 show_conviction_panel(conviction)
 st.divider()
 st.subheader(f"📊 {winner_name} — Performance Metrics")
 show_metrics_row(winner_res, tbill_rate)
 st.divider()
 st.subheader("🏆 Approach Comparison (Winner = Highest Raw Annualised Return)")
 comparison_df = build_comparison_table(results, winner_name)
 show_comparison_table(comparison_df)
-st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
 st.divider()
 st.subheader("📈 Out-of-Sample Equity Curves — All Approaches vs Benchmarks")
 fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
 st.plotly_chart(fig, use_container_width=True)
 st.divider()
 st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
 show_audit_trail(winner_res["audit_trail"])

 from data.loader      import (load_dataset, check_data_freshness,
                                get_features_and_targets, dataset_summary)
+from utils.calendar   import get_est_time, get_next_signal_date
 from models.base      import (build_sequences, train_val_test_split,
                                scale_features, returns_to_labels)
 from models.approach1_wavelet    import train_approach1, predict_approach1
 from ui.components import (
     show_freshness_status, show_signal_banner, show_conviction_panel,
     show_metrics_row, show_comparison_table, show_audit_trail,
+    show_all_signals_panel,
 )
+from ui.charts import equity_curve_chart
 st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="🧠", layout="wide")
     st.header("⚙️ Configuration")
     now_est = get_est_time()
     st.write(f"🕒 **EST:** {now_est.strftime('%H:%M:%S')}")
     st.divider()
     start_yr     = st.slider("📅 Start Year", 2010, 2024, 2016)
     fee_bps      = st.slider("💰 Fee (bps)", 0, 50, 10)
     lookback     = st.slider("📐 Lookback (days)", 20, 60, 30, step=5)
         st.write(f"**Macro:** {', '.join(summary['macro_found'])}")
         st.write(f"**T-bill col:** {'✅' if summary['tbill_found'] else '❌'}")
 if not run_button:
     st.info("👈 Configure parameters and click **🚀 Run All 3 Approaches**.")
     st.stop()
 # ── Features & targets ────────────────────────────────────────────────────────
 try:
+    input_features, target_etfs, tbill_rate, df, _ = get_features_and_targets(df)
 except ValueError as e:
     st.error(str(e))
     st.stop()
 n_etfs    = len(target_etfs)
 n_classes = n_etfs + (1 if include_cash else 0)
 st.info(
     f"🎯 **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])}  ·  "
     f"**Features:** {len(input_features)} signals  ·  "
 X_raw = df[input_features].values.astype(np.float32)
 y_raw = df[target_etfs].values.astype(np.float32)
 for j in range(X_raw.shape[1]):
     mask = np.isnan(X_raw[:, j])
     if mask.any():
+        X_raw[mask, j] = np.nanmean(X_raw[:, j])
 for j in range(y_raw.shape[1]):
     mask = np.isnan(y_raw[:, j])
     if mask.any():
+        y_raw[mask, j] = np.nanmean(y_raw[:, j])
 X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
 y_labels     = returns_to_labels(y_seq, include_cash=include_cash)
 (X_train, y_train_r, X_val, y_val_r,
  X_test,  y_test_r)  = train_val_test_split(X_seq, y_seq,    train_pct, val_pct)
 (_,       y_train_l,  _,    y_val_l,
+ _,       _)         = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
 X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
 st.success(f"✅ Sequences — Train: {train_size:,} · Val: {val_size:,} · Test: {len(X_test):,}")
 # ── Train all three approaches ────────────────────────────────────────────────
 results      = {}
 trained_info = {}
 next_date = get_next_signal_date()
 st.divider()
+# ── Winner signal banner ──────────────────────────────────────────────────────
 show_signal_banner(winner_res["next_signal"], next_date, winner_name)
+# ── Conviction panel (winner only) ────────────────────────────────────────────
 winner_proba = trained_info[winner_name]["proba"]
 conviction   = compute_conviction(winner_proba[-1], target_etfs, include_cash)
 show_conviction_panel(conviction)
 st.divider()
+# ── All models' next day signals ──────────────────────────────────────────────
+all_signals = {
+    name: {
+        "signal":    res["next_signal"],
+        "proba":     trained_info[name]["proba"][-1],
+        "is_winner": name == winner_name,
+    }
+    for name, res in results.items() if res is not None
+}
+show_all_signals_panel(all_signals, target_etfs, include_cash, next_date)
+st.divider()
+# ── Winner performance metrics ────────────────────────────────────────────────
 st.subheader(f"📊 {winner_name} — Performance Metrics")
 show_metrics_row(winner_res, tbill_rate)
 st.divider()
+# ── Comparison table ──────────────────────────────────────────────────────────
 st.subheader("🏆 Approach Comparison (Winner = Highest Raw Annualised Return)")
 comparison_df = build_comparison_table(results, winner_name)
 show_comparison_table(comparison_df)
 st.divider()
+# ── Equity curves ─────────────────────────────────────────────────────────────
 st.subheader("📈 Out-of-Sample Equity Curves — All Approaches vs Benchmarks")
 fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
 st.plotly_chart(fig, use_container_width=True)
 st.divider()
+# ── Audit trail (winner) ──────────────────────────────────────────────────────
 st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
 show_audit_trail(winner_res["audit_trail"])

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py CHANGED Viewed

@@ -1,12 +1,8 @@
 """
 data/loader.py
 Loads master_data.parquet from HF Dataset.
-Validates freshness against the last NYSE trading day.
-No external pings — all data comes from HF Dataset only.
-Actual dataset columns (confirmed from parquet inspection):
-  ETFs    : AGG, GLD, SLV, SPY, TBT, TLT, VNQ
-  Macro   : VIX, DXY, T10Y2Y, TBILL_3M, IG_SPREAD, HY_SPREAD
 """
 import pandas as pd
@@ -22,9 +18,8 @@ try:
 except ImportError:
     NYSE_CAL_AVAILABLE = False
-DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
-PARQUET_FILE = "master_data.parquet"
 TARGET_ETF_COLS = ["TLT", "TBT", "VNQ", "SLV", "GLD"]
 BENCHMARK_COLS  = ["SPY", "AGG"]
 TBILL_COL       = "TBILL_3M"
@@ -64,16 +59,13 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
             token=hf_token,
         )
         df = pd.read_parquet(path)
         if not isinstance(df.index, pd.DatetimeIndex):
             for col in ["Date", "date", "DATE"]:
                 if col in df.columns:
                     df = df.set_index(col)
                     break
             df.index = pd.to_datetime(df.index)
         return df.sort_index()
     except Exception as e:
         st.error(f"❌ Failed to load dataset: {e}")
         return pd.DataFrame()
@@ -84,11 +76,9 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
 def check_data_freshness(df: pd.DataFrame) -> dict:
     if df.empty:
         return {"fresh": False, "message": "Dataset is empty."}
     last   = df.index[-1].date()
     expect = get_last_nyse_trading_day()
     fresh  = last >= expect
     msg = (
         f"✅ Dataset up to date through **{last}**." if fresh else
         f"⚠️ **{expect}** data not yet updated. Latest: **{last}**. "
@@ -98,106 +88,139 @@ def check_data_freshness(df: pd.DataFrame) -> dict:
             "expected_date": expect, "message": msg}
-# ── Detect whether a column holds prices or returns ───────────────────────────
-def _is_price_series(series: pd.Series) -> bool:
-    """
-    Heuristic: a price series has abs(median) > 2 and std/mean < 0.5.
-    A return series has abs(median) < 0.1 and many values near zero.
-    """
     clean = series.dropna()
     if len(clean) == 0:
-        return False
-    med = abs(clean.median())
-    # Strong price signal: median > 2 (e.g. TLT ~ 90, TBT ~ 20)
-    if med > 2:
-        return True
-    # Strong return signal: most values between -0.2 and 0.2
-    if (clean.abs() < 0.2).mean() > 0.9:
-        return False
-    return med > 0.5
-# ── Feature / target extraction ───────────────────────────────────────────────
 def get_features_and_targets(df: pd.DataFrame):
     """
-    Build return columns for target ETFs and benchmarks.
-    Auto-detects whether source columns are prices or already returns.
     Returns:
         input_features : list[str]
         target_etfs    : list[str]  e.g. ["TLT_Ret", ...]
         tbill_rate     : float
-        df             : DataFrame with _Ret columns added
-        col_info       : dict of diagnostics for sidebar display
     """
     missing = [c for c in TARGET_ETF_COLS if c not in df.columns]
     if missing:
         raise ValueError(
             f"Missing ETF columns: {missing}. "
-            f"Found in dataset: {list(df.columns)}"
         )
     col_info = {}
-    # ── Build _Ret columns ────────────────────────────────────────────────────
-    def make_ret(col):
         ret_col = f"{col}_Ret"
-        if ret_col in df.columns:
-            col_info[col] = "pre-computed _Ret"
-            return ret_col
-        if _is_price_series(df[col]):
-            df[ret_col] = df[col].pct_change()
-            col_info[col] = f"price→pct_change (median={df[col].median():.2f})"
-        else:
-            df[ret_col] = df[col]
-            col_info[col] = f"used as-is (median={df[col].median():.4f})"
-        return ret_col
-    target_etfs    = [make_ret(c) for c in TARGET_ETF_COLS]
-    benchmark_rets = [make_ret(c) for c in BENCHMARK_COLS if c in df.columns]
-    # Drop NaN rows (first row from pct_change)
     df = df.dropna(subset=target_etfs).copy()
-    # Sanity check: target returns should be small daily values
-    for ret_col in target_etfs:
-        med = df[ret_col].abs().median()
-        if med > 0.1:
-            st.warning(
-                f"⚠️ {ret_col} has median absolute value {med:.4f} — "
-                f"these may not be daily returns. Check dataset column '{ret_col.replace('_Ret','')}'. "
-                f"Sample values: {df[ret_col].tail(3).values}"
-            )
-    # ── Input features ────────────────────────────────────���───────────────────
-    exclude = set(
-        TARGET_ETF_COLS + BENCHMARK_COLS + target_etfs + benchmark_rets +
-        [f"{c}_Ret" for c in BENCHMARK_COLS] + [TBILL_COL]
-    )
-    # First try known macro columns
-    input_features = [c for c in MACRO_COLS if c in df.columns and c not in exclude]
-    # Then add any engineered signal columns
-    extra = [
-        c for c in df.columns
-        if c not in exclude
-        and c not in input_features
-        and any(k in c for k in ["_Z", "_Vol", "Regime", "YC_", "Credit_",
-                                  "Rates_", "VIX_", "Spread", "DXY", "T10Y",
-                                  "TBILL", "SOFR", "MOVE"])
-        and pd.api.types.is_numeric_dtype(df[c])
-    ]
-    input_features += extra
-    # Fallback: all numeric non-excluded columns
-    if not input_features:
-        input_features = [
-            c for c in df.columns
-            if c not in exclude and pd.api.types.is_numeric_dtype(df[c])
-        ]
     # ── T-bill rate ───────────────────────────────────────────────────────────
     tbill_rate = 0.045
@@ -207,6 +230,14 @@ def get_features_and_targets(df: pd.DataFrame):
             v = float(raw.iloc[-1])
             tbill_rate = v / 100 if v > 1 else v
     return input_features, target_etfs, tbill_rate, df, col_info

 """
 data/loader.py
 Loads master_data.parquet from HF Dataset.
+Engineers rich feature set from raw price/macro columns.
+No external pings — all data from HF Dataset only.
 """
 import pandas as pd
 except ImportError:
     NYSE_CAL_AVAILABLE = False
+DATASET_REPO    = "P2SAMAPA/fi-etf-macro-signal-master-data"
+PARQUET_FILE    = "master_data.parquet"
 TARGET_ETF_COLS = ["TLT", "TBT", "VNQ", "SLV", "GLD"]
 BENCHMARK_COLS  = ["SPY", "AGG"]
 TBILL_COL       = "TBILL_3M"
             token=hf_token,
         )
         df = pd.read_parquet(path)
         if not isinstance(df.index, pd.DatetimeIndex):
             for col in ["Date", "date", "DATE"]:
                 if col in df.columns:
                     df = df.set_index(col)
                     break
             df.index = pd.to_datetime(df.index)
         return df.sort_index()
     except Exception as e:
         st.error(f"❌ Failed to load dataset: {e}")
         return pd.DataFrame()
 def check_data_freshness(df: pd.DataFrame) -> dict:
     if df.empty:
         return {"fresh": False, "message": "Dataset is empty."}
     last   = df.index[-1].date()
     expect = get_last_nyse_trading_day()
     fresh  = last >= expect
     msg = (
         f"✅ Dataset up to date through **{last}**." if fresh else
         f"⚠️ **{expect}** data not yet updated. Latest: **{last}**. "
             "expected_date": expect, "message": msg}
+# ── Price → returns ───────────────────────────────────────────────────────────
+def _to_returns(series: pd.Series) -> pd.Series:
+    """Convert price series to daily pct returns. If already returns, pass through."""
     clean = series.dropna()
     if len(clean) == 0:
+        return series
+    if abs(clean.median()) > 2:          # price series
+        return series.pct_change()
+    return series                         # already returns
+# ── Feature engineering ───────────────────────────────────────────────────────
+def _engineer_features(df: pd.DataFrame, ret_cols: list) -> pd.DataFrame:
+    """
+    Build a rich feature set from raw macro + ETF return columns.
+    Features added per ETF return:
+      - 1d, 5d, 21d lagged returns
+      - 5d, 21d rolling volatility
+      - 5d, 21d momentum (cumulative return)
+    Features added per macro column:
+      - raw value (z-scored over rolling 252d window)
+      - 5d change
+      - 1d lag
+    Also adds:
+      - TBILL_3M as a feature (rate level)
+      - VIX regime flag (VIX > 25)
+      - Yield curve slope (already T10Y2Y)
+      - Cross-asset momentum: spread between TLT_ret and TBT_ret
+    """
+    feat = pd.DataFrame(index=df.index)
+    # ── ETF return features ───────────────────────────────────────────────────
+    for col in ret_cols:
+        r = df[col]
+        feat[f"{col}_lag1"]  = r.shift(1)
+        feat[f"{col}_lag5"]  = r.shift(5)
+        feat[f"{col}_lag21"] = r.shift(21)
+        feat[f"{col}_vol5"]  = r.rolling(5).std()
+        feat[f"{col}_vol21"] = r.rolling(21).std()
+        feat[f"{col}_mom5"]  = r.rolling(5).sum()
+        feat[f"{col}_mom21"] = r.rolling(21).sum()
+    # ── Macro features ────────────────────────────────────────────────────────
+    for col in MACRO_COLS:
+        if col not in df.columns:
+            continue
+        s = df[col]
+        # Z-score over rolling 252-day window
+        roll_mean = s.rolling(252, min_periods=63).mean()
+        roll_std  = s.rolling(252, min_periods=63).std()
+        feat[f"{col}_z"]     = (s - roll_mean) / (roll_std + 1e-9)
+        feat[f"{col}_chg5"]  = s.diff(5)
+        feat[f"{col}_lag1"]  = s.shift(1)
+    # ── TBILL level ───────────────────────────────────────────────────────────
+    if TBILL_COL in df.columns:
+        tbill = df[TBILL_COL]
+        feat["TBILL_level"] = tbill
+        feat["TBILL_chg5"]  = tbill.diff(5)
+    # ── Derived cross-asset signals ───────────────────────────────────────────
+    if "TLT_Ret" in df.columns and "TBT_Ret" in df.columns:
+        feat["TLT_TBT_spread_mom5"] = (
+            df["TLT_Ret"].rolling(5).sum() - df["TBT_Ret"].rolling(5).sum()
+        )
+    if "VIX" in df.columns:
+        feat["VIX_regime"] = (df["VIX"] > 25).astype(float)
+        feat["VIX_mom5"]   = df["VIX"].diff(5)
+    if "T10Y2Y" in df.columns:
+        feat["YC_inverted"] = (df["T10Y2Y"] < 0).astype(float)
+    if "IG_SPREAD" in df.columns and "HY_SPREAD" in df.columns:
+        feat["credit_ratio"] = df["HY_SPREAD"] / (df["IG_SPREAD"] + 1e-9)
+    return feat
+# ── Main extraction function ──────────────────────────────────────────────────
 def get_features_and_targets(df: pd.DataFrame):
     """
+    Build return columns for target ETFs and engineer a rich feature set.
     Returns:
         input_features : list[str]
         target_etfs    : list[str]  e.g. ["TLT_Ret", ...]
         tbill_rate     : float
+        df_out         : DataFrame with all columns
+        col_info       : dict of diagnostics
     """
     missing = [c for c in TARGET_ETF_COLS if c not in df.columns]
     if missing:
         raise ValueError(
             f"Missing ETF columns: {missing}. "
+            f"Found: {list(df.columns)}"
         )
     col_info = {}
+    # ── Build ETF return columns ──────────────────────────────────────────────
+    target_etfs = []
+    for col in TARGET_ETF_COLS:
         ret_col = f"{col}_Ret"
+        df[ret_col] = _to_returns(df[col])
+        med = abs(df[col].dropna().median())
+        col_info[col] = f"price→pct_change (median={med:.2f})" if med > 2 else f"used as-is (median={med:.4f})"
+        target_etfs.append(ret_col)
+    # ── Build benchmark return columns ────────────────────────────────────────
+    for col in BENCHMARK_COLS:
+        if col in df.columns:
+            df[f"{col}_Ret"] = _to_returns(df[col])
+    # ── Drop NaN from first pct_change row ────────────────────────────────────
     df = df.dropna(subset=target_etfs).copy()
+    # ── Engineer features ─────────────────────────────────────────────────────
+    feat_df = _engineer_features(df, target_etfs)
+    # Merge features into df
+    for col in feat_df.columns:
+        df[col] = feat_df[col].values
+    # Drop rows with NaN in features (from lags/rolling)
+    feat_cols = list(feat_df.columns)
+    df = df.dropna(subset=feat_cols).copy()
     # ── T-bill rate ───────────────────────────────────────────────────────────
     tbill_rate = 0.045
             v = float(raw.iloc[-1])
             tbill_rate = v / 100 if v > 1 else v
+    # Input features = all engineered feature columns
+    exclude = set(
+        TARGET_ETF_COLS + BENCHMARK_COLS + target_etfs +
+        [f"{c}_Ret" for c in BENCHMARK_COLS] + [TBILL_COL] +
+        list(MACRO_COLS)
+    )
+    input_features = [c for c in feat_cols if c not in exclude]
     return input_features, target_etfs, tbill_rate, df, col_info

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py CHANGED Viewed

@@ -9,7 +9,6 @@ import streamlit as st
 import pandas as pd
 import numpy as np
-# ── Module imports ────────────────────────────────────────────────────────────
 from data.loader      import (load_dataset, check_data_freshness,
                                get_features_and_targets, dataset_summary)
 from utils.calendar   import get_est_time, is_sync_window, get_next_signal_date
@@ -26,20 +25,13 @@ from ui.components import (
 )
 from ui.charts import equity_curve_chart, comparison_bar_chart
-# ── Page config ───────────────────────────────────────────────────────────────
-st.set_page_config(
-    page_title="P2-ETF-CNN-LSTM",
-    page_icon="🧠",
-    layout="wide",
-)
-# ── Secrets ───────────────────────────────────────────────────────────────────
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 # ── Sidebar ───────────────────────────────────────────────────────────────────
 with st.sidebar:
     st.header("⚙️ Configuration")
     now_est = get_est_time()
     st.write(f"🕒 **EST:** {now_est.strftime('%H:%M:%S')}")
     if is_sync_window():
@@ -48,25 +40,19 @@ with st.sidebar:
         st.info("⏸️ Sync Window Inactive")
     st.divider()
     start_yr     = st.slider("📅 Start Year", 2010, 2024, 2016)
     fee_bps      = st.slider("💰 Fee (bps)", 0, 50, 10)
     lookback     = st.slider("📐 Lookback (days)", 20, 60, 30, step=5)
     epochs       = st.number_input("🔁 Max Epochs", 20, 300, 100, step=10)
     st.divider()
     split_option = st.selectbox("📊 Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0)
-    split_map    = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}
-    train_pct, val_pct = split_map[split_option]
-    include_cash = st.checkbox(
-        "💵 Include CASH class", value=True,
-        help="Model can select CASH (earns T-bill rate) instead of any ETF",
-    )
     st.divider()
     run_button = st.button("🚀 Run All 3 Approaches", type="primary", use_container_width=True)
 # ── Title ─────────────────────────────────────────────────────────────────────
@@ -74,9 +60,8 @@ st.title("🧠 P2-ETF-CNN-LSTM")
 st.caption("Approach 1: Wavelet  ·  Approach 2: Regime-Conditioned  ·  Approach 3: Multi-Scale Parallel")
 st.caption("Winner selected by highest raw annualised return on out-of-sample test set.")
-# ── Token check ───────────────────────────────────────────────────────────────
 if not HF_TOKEN:
-    st.error("❌ HF_TOKEN secret not found. Add it to HF Space / GitHub secrets.")
     st.stop()
 # ── Load dataset ──────────────────────────────────────────────────────────────
@@ -86,11 +71,10 @@ with st.spinner("📡 Loading dataset from HuggingFace..."):
 if df_raw.empty:
     st.stop()
-# ── Freshness check ───────────────────────────────────────────────────────────
 freshness = check_data_freshness(df_raw)
 show_freshness_status(freshness)
-# ── Dataset summary in sidebar ────────────────────────────────────────────────
 with st.sidebar:
     st.divider()
     st.subheader("📦 Dataset Info")
@@ -103,21 +87,21 @@ with st.sidebar:
         st.write(f"**Macro:** {', '.join(summary['macro_found'])}")
         st.write(f"**T-bill col:** {'✅' if summary['tbill_found'] else '❌'}")
-# ── Wait for run button ─────────────────────────────────────────────��─────────
 if not run_button:
-    st.info("👈 Configure parameters in the sidebar and click **🚀 Run All 3 Approaches**.")
     st.stop()
 # ── Filter by start year ──────────────────────────────────────────────────────
 df = df_raw[df_raw.index.year >= start_yr].copy()
-st.write(
-    f"📅 **Data:** {df.index[0].strftime('%Y-%m-%d')} → {df.index[-1].strftime('%Y-%m-%d')}  "
-    f"({df.index[-1].year - df.index[0].year + 1} years)"
-)
 # ── Features & targets ────────────────────────────────────────────────────────
 try:
-    input_features, target_etfs, tbill_rate, df = get_features_and_targets(df)
 except ValueError as e:
     st.error(str(e))
     st.stop()
@@ -125,6 +109,18 @@ except ValueError as e:
 n_etfs    = len(target_etfs)
 n_classes = n_etfs + (1 if include_cash else 0)
 st.info(
     f"🎯 **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])}  ·  "
     f"**Features:** {len(input_features)} signals  ·  "
@@ -135,13 +131,20 @@ st.info(
 X_raw = df[input_features].values.astype(np.float32)
 y_raw = df[target_etfs].values.astype(np.float32)
-# Fill any remaining NaNs with column means
 col_means = np.nanmean(X_raw, axis=0)
 for j in range(X_raw.shape[1]):
     mask = np.isnan(X_raw[:, j])
     if mask.any():
         X_raw[mask, j] = col_means[j]
 X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
 y_labels     = returns_to_labels(y_seq, include_cash=include_cash)
@@ -154,27 +157,30 @@ X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
 train_size = len(X_train)
 val_size   = len(X_val)
 test_start = lookback + train_size + val_size
 test_dates = df.index[test_start: test_start + len(X_test)]
 test_slice = slice(test_start, test_start + len(X_test))
-st.success(
-    f"✅ Sequences — Train: {train_size:,} · Val: {val_size:,} · Test: {len(X_test):,}"
-)
 # ── Train all three approaches ────────────────────────────────────────────────
 results      = {}
 trained_info = {}
-progress = st.progress(0, text="Starting training...")
-# ── Approach 1 ────────────────────────────────────────────────────────────────
 with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
     try:
         model1, hist1, _ = train_approach1(
-            X_train_s, y_train_l,
-            X_val_s,   y_val_l,
             n_classes=n_classes, epochs=int(epochs),
         )
         preds1, proba1 = predict_approach1(model1, X_test_s)
@@ -190,17 +196,13 @@ with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
 progress.progress(33, text="Approach 1 done...")
-# ── Approach 2 ─��──────────────────────────────────────────────────────────────
 with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
     try:
         model2, hist2, hmm2, regime_cols2 = train_approach2(
-            X_train_s, y_train_l,
-            X_val_s,   y_val_l,
-            X_flat_all=X_raw,
-            feature_names=input_features,
-            lookback=lookback,
-            train_size=train_size,
-            val_size=val_size,
             n_classes=n_classes, epochs=int(epochs),
         )
         preds2, proba2 = predict_approach2(
@@ -219,12 +221,11 @@ with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
 progress.progress(66, text="Approach 2 done...")
-# ── Approach 3 ────────────────────────────────────────────────────────────────
 with st.spinner("📡 Training Approach 3 — Multi-Scale CNN-LSTM..."):
     try:
         model3, hist3 = train_approach3(
-            X_train_s, y_train_l,
-            X_val_s,   y_val_l,
             n_classes=n_classes, epochs=int(epochs),
         )
         preds3, proba3 = predict_approach3(model3, X_test_s)
@@ -250,41 +251,29 @@ if winner_res is None:
     st.stop()
 next_date = get_next_signal_date()
 st.divider()
-# ── Signal banner ─────────────────────────────────────────────────────────────
 show_signal_banner(winner_res["next_signal"], next_date, winner_name)
-# ── Conviction panel ──────────────────────────────────────────────────────────
 winner_proba = trained_info[winner_name]["proba"]
 conviction   = compute_conviction(winner_proba[-1], target_etfs, include_cash)
 show_conviction_panel(conviction)
 st.divider()
-# ── Winner metrics ────────────────────────────────────────────────────────────
 st.subheader(f"📊 {winner_name} — Performance Metrics")
 show_metrics_row(winner_res, tbill_rate)
 st.divider()
-# ── Comparison table ──────────────────────────────────────────────────────────
 st.subheader("🏆 Approach Comparison (Winner = Highest Raw Annualised Return)")
 comparison_df = build_comparison_table(results, winner_name)
 show_comparison_table(comparison_df)
 st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
 st.divider()
-# ── Equity curves ─────────────────────────────────────────────────────────────
 st.subheader("📈 Out-of-Sample Equity Curves — All Approaches vs Benchmarks")
 fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
 st.plotly_chart(fig, use_container_width=True)
 st.divider()
-# ── Audit trail ───────────────────────────────────────────────────────────────
 st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
 show_audit_trail(winner_res["audit_trail"])

 import pandas as pd
 import numpy as np
 from data.loader      import (load_dataset, check_data_freshness,
                                get_features_and_targets, dataset_summary)
 from utils.calendar   import get_est_time, is_sync_window, get_next_signal_date
 )
 from ui.charts import equity_curve_chart, comparison_bar_chart
+st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="🧠", layout="wide")
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 # ── Sidebar ───────────────────────────────────────────────────────────────────
 with st.sidebar:
     st.header("⚙️ Configuration")
     now_est = get_est_time()
     st.write(f"🕒 **EST:** {now_est.strftime('%H:%M:%S')}")
     if is_sync_window():
         st.info("⏸️ Sync Window Inactive")
     st.divider()
     start_yr     = st.slider("📅 Start Year", 2010, 2024, 2016)
     fee_bps      = st.slider("💰 Fee (bps)", 0, 50, 10)
     lookback     = st.slider("📐 Lookback (days)", 20, 60, 30, step=5)
     epochs       = st.number_input("🔁 Max Epochs", 20, 300, 100, step=10)
     st.divider()
     split_option = st.selectbox("📊 Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0)
+    train_pct, val_pct = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}[split_option]
+    include_cash = st.checkbox("💵 Include CASH class", value=True,
+        help="Model can select CASH (earns T-bill rate) instead of any ETF")
     st.divider()
     run_button = st.button("🚀 Run All 3 Approaches", type="primary", use_container_width=True)
 # ── Title ─────────────────────────────────────────────────────────────────────
 st.caption("Approach 1: Wavelet  ·  Approach 2: Regime-Conditioned  ·  Approach 3: Multi-Scale Parallel")
 st.caption("Winner selected by highest raw annualised return on out-of-sample test set.")
 if not HF_TOKEN:
+    st.error("❌ HF_TOKEN secret not found.")
     st.stop()
 # ── Load dataset ──────────────────────────────────────────────────────────────
 if df_raw.empty:
     st.stop()
 freshness = check_data_freshness(df_raw)
 show_freshness_status(freshness)
+# ── Dataset info sidebar ──────────────────────────────────────────────────────
 with st.sidebar:
     st.divider()
     st.subheader("📦 Dataset Info")
         st.write(f"**Macro:** {', '.join(summary['macro_found'])}")
         st.write(f"**T-bill col:** {'✅' if summary['tbill_found'] else '❌'}")
+        with st.expander("🔍 All columns"):
+            st.write(summary["all_cols"])
 if not run_button:
+    st.info("👈 Configure parameters and click **🚀 Run All 3 Approaches**.")
     st.stop()
 # ── Filter by start year ──────────────────────────────────────────────────────
 df = df_raw[df_raw.index.year >= start_yr].copy()
+st.write(f"📅 **Data:** {df.index[0].strftime('%Y-%m-%d')} → {df.index[-1].strftime('%Y-%m-%d')} "
+         f"({df.index[-1].year - df.index[0].year + 1} years)")
 # ── Features & targets ────────────────────────────────────────────────────────
 try:
+    input_features, target_etfs, tbill_rate, df, col_info = get_features_and_targets(df)
 except ValueError as e:
     st.error(str(e))
     st.stop()
 n_etfs    = len(target_etfs)
 n_classes = n_etfs + (1 if include_cash else 0)
+# ── Show column detection diagnostics ────────────────────────────────────────
+with st.expander("🔬 Column detection diagnostics", expanded=False):
+    st.write("**How each ETF column was interpreted:**")
+    for col, info in col_info.items():
+        st.write(f"- `{col}`: {info}")
+    st.write(f"**Input features ({len(input_features)}):** {input_features}")
+    st.write(f"**T-bill rate used:** {tbill_rate*100:.3f}%")
+    # Show sample return values to verify correctness
+    st.write("**Sample target return values (last 3 rows):**")
+    st.dataframe(df[target_etfs].tail(3))
 st.info(
     f"🎯 **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])}  ·  "
     f"**Features:** {len(input_features)} signals  ·  "
 X_raw = df[input_features].values.astype(np.float32)
 y_raw = df[target_etfs].values.astype(np.float32)
+# Fill NaNs
 col_means = np.nanmean(X_raw, axis=0)
 for j in range(X_raw.shape[1]):
     mask = np.isnan(X_raw[:, j])
     if mask.any():
         X_raw[mask, j] = col_means[j]
+# Also fill NaNs in y_raw
+y_means = np.nanmean(y_raw, axis=0)
+for j in range(y_raw.shape[1]):
+    mask = np.isnan(y_raw[:, j])
+    if mask.any():
+        y_raw[mask, j] = y_means[j]
 X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
 y_labels     = returns_to_labels(y_seq, include_cash=include_cash)
 train_size = len(X_train)
 val_size   = len(X_val)
 test_start = lookback + train_size + val_size
 test_dates = df.index[test_start: test_start + len(X_test)]
 test_slice = slice(test_start, test_start + len(X_test))
+st.success(f"✅ Sequences — Train: {train_size:,} · Val: {val_size:,} · Test: {len(X_test):,}")
+# Show class distribution to check for degenerate labels
+with st.expander("🔬 Label distribution (train set)", expanded=False):
+    unique, counts = np.unique(y_train_l, return_counts=True)
+    label_names = [target_etfs[i].replace("_Ret","") if i < n_etfs else "CASH" for i in unique]
+    dist_df = pd.DataFrame({"Class": label_names, "Count": counts,
+                             "Pct": (counts / counts.sum() * 100).round(1)})
+    st.dataframe(dist_df)
 # ── Train all three approaches ────────────────────────────────────────────────
 results      = {}
 trained_info = {}
+progress     = st.progress(0, text="Starting training...")
+# Approach 1
 with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
     try:
         model1, hist1, _ = train_approach1(
+            X_train_s, y_train_l, X_val_s, y_val_l,
             n_classes=n_classes, epochs=int(epochs),
         )
         preds1, proba1 = predict_approach1(model1, X_test_s)
 progress.progress(33, text="Approach 1 done...")
+# Approach 2
 with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
     try:
         model2, hist2, hmm2, regime_cols2 = train_approach2(
+            X_train_s, y_train_l, X_val_s, y_val_l,
+            X_flat_all=X_raw, feature_names=input_features,
+            lookback=lookback, train_size=train_size, val_size=val_size,
             n_classes=n_classes, epochs=int(epochs),
         )
         preds2, proba2 = predict_approach2(
 progress.progress(66, text="Approach 2 done...")
+# Approach 3
 with st.spinner("📡 Training Approach 3 — Multi-Scale CNN-LSTM..."):
     try:
         model3, hist3 = train_approach3(
+            X_train_s, y_train_l, X_val_s, y_val_l,
             n_classes=n_classes, epochs=int(epochs),
         )
         preds3, proba3 = predict_approach3(model3, X_test_s)
     st.stop()
 next_date = get_next_signal_date()
 st.divider()
 show_signal_banner(winner_res["next_signal"], next_date, winner_name)
 winner_proba = trained_info[winner_name]["proba"]
 conviction   = compute_conviction(winner_proba[-1], target_etfs, include_cash)
 show_conviction_panel(conviction)
 st.divider()
 st.subheader(f"📊 {winner_name} — Performance Metrics")
 show_metrics_row(winner_res, tbill_rate)
 st.divider()
 st.subheader("🏆 Approach Comparison (Winner = Highest Raw Annualised Return)")
 comparison_df = build_comparison_table(results, winner_name)
 show_comparison_table(comparison_df)
 st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
 st.divider()
 st.subheader("📈 Out-of-Sample Equity Curves — All Approaches vs Benchmarks")
 fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
 st.plotly_chart(fig, use_container_width=True)
 st.divider()
 st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
 show_audit_trail(winner_res["audit_trail"])

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py CHANGED Viewed

@@ -4,7 +4,7 @@ Loads master_data.parquet from HF Dataset.
 Validates freshness against the last NYSE trading day.
 No external pings — all data comes from HF Dataset only.
-Actual dataset columns (from parquet inspection):
   ETFs    : AGG, GLD, SLV, SPY, TBT, TLT, VNQ
   Macro   : VIX, DXY, T10Y2Y, TBILL_3M, IG_SPREAD, HY_SPREAD
 """
@@ -15,7 +15,6 @@ import streamlit as st
 from huggingface_hub import hf_hub_download
 from datetime import datetime, timedelta
 import pytz
-import os
 try:
     import pandas_market_calendars as mcal
@@ -26,33 +25,27 @@ except ImportError:
 DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
 PARQUET_FILE = "master_data.parquet"
-# ── Actual column names in the dataset ───────────────────────────────────────
-TARGET_ETF_COLS  = ["TLT", "TBT", "VNQ", "SLV", "GLD"]   # traded ETFs
-BENCHMARK_COLS   = ["SPY", "AGG"]                           # chart only
-TBILL_COL        = "TBILL_3M"                               # 3m T-bill rate
-MACRO_COLS       = ["VIX", "DXY", "T10Y2Y", "IG_SPREAD", "HY_SPREAD"]
-# ── NYSE calendar helpers ─────────────────────────────────────────────────────
 def get_last_nyse_trading_day(as_of=None):
-    """Return the most recent NYSE trading day on or before as_of (default: today EST)."""
     est = pytz.timezone("US/Eastern")
     if as_of is None:
         as_of = datetime.now(est)
     today = as_of.date()
     if NYSE_CAL_AVAILABLE:
         try:
             nyse  = mcal.get_calendar("NYSE")
-            start = today - timedelta(days=10)
-            sched = nyse.schedule(start_date=start, end_date=today)
             if len(sched) > 0:
                 return sched.index[-1].date()
         except Exception:
             pass
-    # Fallback: skip weekends
     candidate = today
     while candidate.weekday() >= 5:
         candidate -= timedelta(days=1)
@@ -63,10 +56,6 @@ def get_last_nyse_trading_day(as_of=None):
 @st.cache_data(ttl=3600, show_spinner=False)
 def load_dataset(hf_token: str) -> pd.DataFrame:
-    """
-    Download master_data.parquet from HF Dataset and return as DataFrame.
-    Cached for 1 hour. Index is parsed as DatetimeIndex.
-    """
     try:
         path = hf_hub_download(
             repo_id=DATASET_REPO,
@@ -76,7 +65,6 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
         )
         df = pd.read_parquet(path)
-        # Ensure DatetimeIndex
         if not isinstance(df.index, pd.DatetimeIndex):
             for col in ["Date", "date", "DATE"]:
                 if col in df.columns:
@@ -84,66 +72,66 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
                     break
             df.index = pd.to_datetime(df.index)
-        df = df.sort_index()
-        return df
     except Exception as e:
-        st.error(f"❌ Failed to load dataset from HuggingFace: {e}")
         return pd.DataFrame()
 # ── Freshness check ───────────────────────────────────────────────────────────
 def check_data_freshness(df: pd.DataFrame) -> dict:
-    """
-    Check whether the dataset contains data for the last NYSE trading day.
-    """
     if df.empty:
-        return {
-            "fresh": False,
-            "last_date_in_data": None,
-            "expected_date": None,
-            "message": "Dataset is empty.",
-        }
-    last_date_in_data = df.index[-1].date()
-    expected_date     = get_last_nyse_trading_day()
-    fresh             = last_date_in_data >= expected_date
-    if fresh:
-        message = f"✅ Dataset is up to date through **{last_date_in_data}**."
-    else:
-        message = (
-            f"⚠️ **{expected_date}** data not yet updated in dataset. "
-            f"Latest available: **{last_date_in_data}**. "
-            f"Please check back later — the dataset updates daily after market close."
-        )
-    return {
-        "fresh": fresh,
-        "last_date_in_data": last_date_in_data,
-        "expected_date": expected_date,
-        "message": message,
-    }
 # ── Feature / target extraction ───────────────────────────────────────────────
 def get_features_and_targets(df: pd.DataFrame):
     """
-    Extract input feature columns and target ETF return columns.
-    The dataset stores raw price or return values directly under ticker names.
-    We compute daily log returns for target ETFs if they are not already returns.
     Returns:
-        input_features : list of column names to use as model inputs
-        target_etfs    : list of ETF column names (after return computation)
-        tbill_rate     : latest 3m T-bill rate as float (annualised, e.g. 0.045)
-        df             : DataFrame (possibly with new _Ret columns added)
     """
-    # ── Confirm target ETFs exist ─────────────────────────────────────────────
     missing = [c for c in TARGET_ETF_COLS if c not in df.columns]
     if missing:
         raise ValueError(
@@ -151,71 +139,75 @@ def get_features_and_targets(df: pd.DataFrame):
             f"Found in dataset: {list(df.columns)}"
         )
-    # ── Build return columns ──────────────────────────────────────────────────
-    # If values look like prices (>5), compute pct returns.
-    # If they already look like small returns (<1 in abs), use as-is.
-    target_etfs = []
-    for col in TARGET_ETF_COLS:
-        ret_col = f"{col}_Ret"
-        if ret_col not in df.columns:
-            sample = df[col].dropna()
-            if len(sample) > 0 and abs(sample.median()) > 1:
-                # Looks like price — compute pct change
-                df[ret_col] = df[col].pct_change()
-            else:
-                # Already returns
-                df[ret_col] = df[col]
-        target_etfs.append(ret_col)
-    # Same for benchmarks
-    for col in BENCHMARK_COLS:
-        ret_col = f"{col}_Ret"
-        if ret_col not in df.columns and col in df.columns:
-            sample = df[col].dropna()
-            if len(sample) > 0 and abs(sample.median()) > 1:
-                df[ret_col] = df[col].pct_change()
-            else:
-                df[ret_col] = df[col]
-    # Drop rows with NaN in target columns (first row after pct_change)
-    df = df.dropna(subset=target_etfs)
     # ── Input features ────────────────────────────────────────────────────────
-    # Use macro columns directly; exclude ETF price/return cols and benchmarks
     exclude = set(
-        TARGET_ETF_COLS + BENCHMARK_COLS +
-        target_etfs +
-        [f"{c}_Ret" for c in BENCHMARK_COLS] +
-        [TBILL_COL]
     )
-    input_features = [
         c for c in df.columns
         if c not in exclude
-        and c in (MACRO_COLS + [
-            col for col in df.columns
-            if any(k in col for k in ["_Z", "_Vol", "Regime", "YC_", "Credit_",
-                                       "Rates_", "VIX_", "Spread", "DXY", "T10Y"])
-        ])
     ]
-    # Fallback: if none matched, use all non-excluded numeric columns
     if not input_features:
         input_features = [
             c for c in df.columns
-            if c not in exclude
-            and pd.api.types.is_numeric_dtype(df[c])
         ]
     # ── T-bill rate ───────────────────────────────────────────────────────────
-    tbill_rate = 0.045   # default
     if TBILL_COL in df.columns:
         raw = df[TBILL_COL].dropna()
         if len(raw) > 0:
-            last_val   = float(raw.iloc[-1])
-            tbill_rate = last_val / 100 if last_val > 1 else last_val
-    return input_features, target_etfs, tbill_rate, df
 # ── Dataset summary ───────────────────────────────────────────────────────────
@@ -228,8 +220,9 @@ def dataset_summary(df: pd.DataFrame) -> dict:
         "columns":     len(df.columns),
         "start_date":  df.index[0].strftime("%Y-%m-%d"),
         "end_date":    df.index[-1].strftime("%Y-%m-%d"),
-        "etfs_found":  [c for c in TARGET_ETF_COLS  if c in df.columns],
-        "benchmarks":  [c for c in BENCHMARK_COLS   if c in df.columns],
-        "macro_found": [c for c in MACRO_COLS        if c in df.columns],
         "tbill_found": TBILL_COL in df.columns,
     }

 Validates freshness against the last NYSE trading day.
 No external pings — all data comes from HF Dataset only.
+Actual dataset columns (confirmed from parquet inspection):
   ETFs    : AGG, GLD, SLV, SPY, TBT, TLT, VNQ
   Macro   : VIX, DXY, T10Y2Y, TBILL_3M, IG_SPREAD, HY_SPREAD
 """
 from huggingface_hub import hf_hub_download
 from datetime import datetime, timedelta
 import pytz
 try:
     import pandas_market_calendars as mcal
 DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
 PARQUET_FILE = "master_data.parquet"
+TARGET_ETF_COLS = ["TLT", "TBT", "VNQ", "SLV", "GLD"]
+BENCHMARK_COLS  = ["SPY", "AGG"]
+TBILL_COL       = "TBILL_3M"
+MACRO_COLS      = ["VIX", "DXY", "T10Y2Y", "IG_SPREAD", "HY_SPREAD"]
+# ── NYSE calendar ─────────────────────────────────────────────────────────────
 def get_last_nyse_trading_day(as_of=None):
     est = pytz.timezone("US/Eastern")
     if as_of is None:
         as_of = datetime.now(est)
     today = as_of.date()
     if NYSE_CAL_AVAILABLE:
         try:
             nyse  = mcal.get_calendar("NYSE")
+            sched = nyse.schedule(start_date=today - timedelta(days=10), end_date=today)
             if len(sched) > 0:
                 return sched.index[-1].date()
         except Exception:
             pass
     candidate = today
     while candidate.weekday() >= 5:
         candidate -= timedelta(days=1)
 @st.cache_data(ttl=3600, show_spinner=False)
 def load_dataset(hf_token: str) -> pd.DataFrame:
     try:
         path = hf_hub_download(
             repo_id=DATASET_REPO,
         )
         df = pd.read_parquet(path)
         if not isinstance(df.index, pd.DatetimeIndex):
             for col in ["Date", "date", "DATE"]:
                 if col in df.columns:
                     break
             df.index = pd.to_datetime(df.index)
+        return df.sort_index()
     except Exception as e:
+        st.error(f"❌ Failed to load dataset: {e}")
         return pd.DataFrame()
 # ── Freshness check ───────────────────────────────────────────────────────────
 def check_data_freshness(df: pd.DataFrame) -> dict:
     if df.empty:
+        return {"fresh": False, "message": "Dataset is empty."}
+    last   = df.index[-1].date()
+    expect = get_last_nyse_trading_day()
+    fresh  = last >= expect
+    msg = (
+        f"✅ Dataset up to date through **{last}**." if fresh else
+        f"⚠️ **{expect}** data not yet updated. Latest: **{last}**. "
+        f"Dataset updates daily after market close."
+    )
+    return {"fresh": fresh, "last_date_in_data": last,
+            "expected_date": expect, "message": msg}
+# ── Detect whether a column holds prices or returns ───────────────────────────
+def _is_price_series(series: pd.Series) -> bool:
+    """
+    Heuristic: a price series has abs(median) > 2 and std/mean < 0.5.
+    A return series has abs(median) < 0.1 and many values near zero.
+    """
+    clean = series.dropna()
+    if len(clean) == 0:
+        return False
+    med = abs(clean.median())
+    # Strong price signal: median > 2 (e.g. TLT ~ 90, TBT ~ 20)
+    if med > 2:
+        return True
+    # Strong return signal: most values between -0.2 and 0.2
+    if (clean.abs() < 0.2).mean() > 0.9:
+        return False
+    return med > 0.5
 # ── Feature / target extraction ───────────────────────────────────────────────
 def get_features_and_targets(df: pd.DataFrame):
     """
+    Build return columns for target ETFs and benchmarks.
+    Auto-detects whether source columns are prices or already returns.
     Returns:
+        input_features : list[str]
+        target_etfs    : list[str]  e.g. ["TLT_Ret", ...]
+        tbill_rate     : float
+        df             : DataFrame with _Ret columns added
+        col_info       : dict of diagnostics for sidebar display
     """
     missing = [c for c in TARGET_ETF_COLS if c not in df.columns]
     if missing:
         raise ValueError(
             f"Found in dataset: {list(df.columns)}"
         )
+    col_info = {}
+    # ── Build _Ret columns ────────────────────────────────────────────────────
+    def make_ret(col):
+        ret_col = f"{col}_Ret"
+        if ret_col in df.columns:
+            col_info[col] = "pre-computed _Ret"
+            return ret_col
+        if _is_price_series(df[col]):
+            df[ret_col] = df[col].pct_change()
+            col_info[col] = f"price→pct_change (median={df[col].median():.2f})"
+        else:
+            df[ret_col] = df[col]
+            col_info[col] = f"used as-is (median={df[col].median():.4f})"
+        return ret_col
+    target_etfs    = [make_ret(c) for c in TARGET_ETF_COLS]
+    benchmark_rets = [make_ret(c) for c in BENCHMARK_COLS if c in df.columns]
+    # Drop NaN rows (first row from pct_change)
+    df = df.dropna(subset=target_etfs).copy()
+    # Sanity check: target returns should be small daily values
+    for ret_col in target_etfs:
+        med = df[ret_col].abs().median()
+        if med > 0.1:
+            st.warning(
+                f"⚠️ {ret_col} has median absolute value {med:.4f} — "
+                f"these may not be daily returns. Check dataset column '{ret_col.replace('_Ret','')}'. "
+                f"Sample values: {df[ret_col].tail(3).values}"
+            )
     # ── Input features ────────────────────────────────────────────────────────
     exclude = set(
+        TARGET_ETF_COLS + BENCHMARK_COLS + target_etfs + benchmark_rets +
+        [f"{c}_Ret" for c in BENCHMARK_COLS] + [TBILL_COL]
     )
+    # First try known macro columns
+    input_features = [c for c in MACRO_COLS if c in df.columns and c not in exclude]
+    # Then add any engineered signal columns
+    extra = [
         c for c in df.columns
         if c not in exclude
+        and c not in input_features
+        and any(k in c for k in ["_Z", "_Vol", "Regime", "YC_", "Credit_",
+                                  "Rates_", "VIX_", "Spread", "DXY", "T10Y",
+                                  "TBILL", "SOFR", "MOVE"])
+        and pd.api.types.is_numeric_dtype(df[c])
     ]
+    input_features += extra
+    # Fallback: all numeric non-excluded columns
     if not input_features:
         input_features = [
             c for c in df.columns
+            if c not in exclude and pd.api.types.is_numeric_dtype(df[c])
         ]
     # ── T-bill rate ───────────────────────────────────────────────────────────
+    tbill_rate = 0.045
     if TBILL_COL in df.columns:
         raw = df[TBILL_COL].dropna()
         if len(raw) > 0:
+            v = float(raw.iloc[-1])
+            tbill_rate = v / 100 if v > 1 else v
+    return input_features, target_etfs, tbill_rate, df, col_info
 # ── Dataset summary ───────────────────────────────────────────────────────────
         "columns":     len(df.columns),
         "start_date":  df.index[0].strftime("%Y-%m-%d"),
         "end_date":    df.index[-1].strftime("%Y-%m-%d"),
+        "etfs_found":  [c for c in TARGET_ETF_COLS if c in df.columns],
+        "benchmarks":  [c for c in BENCHMARK_COLS  if c in df.columns],
+        "macro_found": [c for c in MACRO_COLS       if c in df.columns],
         "tbill_found": TBILL_COL in df.columns,
+        "all_cols":    list(df.columns),
     }

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py CHANGED Viewed

@@ -10,9 +10,11 @@ import pandas as pd
 import numpy as np
 # ── Module imports ────────────────────────────────────────────────────────────
-from data.loader      import load_dataset, check_data_freshness, get_features_and_targets, dataset_summary
 from utils.calendar   import get_est_time, is_sync_window, get_next_signal_date
-from models.base      import build_sequences, train_val_test_split, scale_features, returns_to_labels
 from models.approach1_wavelet    import train_approach1, predict_approach1
 from models.approach2_regime     import train_approach2, predict_approach2
 from models.approach3_multiscale import train_approach3, predict_approach3
@@ -47,10 +49,10 @@ with st.sidebar:
     st.divider()
-    start_yr = st.slider("📅 Start Year", 2010, 2024, 2016)
-    fee_bps  = st.slider("💰 Fee (bps)", 0, 50, 10)
-    lookback = st.slider("📐 Lookback (days)", 20, 60, 30, step=5)
-    epochs   = st.number_input("🔁 Max Epochs", 20, 300, 100, step=10)
     st.divider()
@@ -58,8 +60,10 @@ with st.sidebar:
     split_map    = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}
     train_pct, val_pct = split_map[split_option]
-    include_cash = st.checkbox("💵 Include CASH class", value=True,
-                               help="Model can select CASH (earns T-bill rate) as an alternative to any ETF")
     st.divider()
@@ -70,90 +74,102 @@ st.title("🧠 P2-ETF-CNN-LSTM")
 st.caption("Approach 1: Wavelet  ·  Approach 2: Regime-Conditioned  ·  Approach 3: Multi-Scale Parallel")
 st.caption("Winner selected by highest raw annualised return on out-of-sample test set.")
-# ── Load data (always, to check freshness) ────────────────────────────────────
 if not HF_TOKEN:
-    st.error("❌ HF_TOKEN secret not found. Please add it to your HF Space / GitHub secrets.")
     st.stop()
 with st.spinner("📡 Loading dataset from HuggingFace..."):
-    df = load_dataset(HF_TOKEN)
-if df.empty:
     st.stop()
 # ── Freshness check ───────────────────────────────────────────────────────────
-freshness = check_data_freshness(df)
 show_freshness_status(freshness)
 # ── Dataset summary in sidebar ────────────────────────────────────────────────
 with st.sidebar:
     st.divider()
     st.subheader("📦 Dataset Info")
-    summary = dataset_summary(df)
     if summary:
         st.write(f"**Rows:** {summary['rows']:,}")
         st.write(f"**Range:** {summary['start_date']} → {summary['end_date']}")
-        st.write(f"**ETFs:** {', '.join([e.replace('_Ret','') for e in summary['etfs_found']])}")
-        st.write(f"**Benchmarks:** {', '.join([b.replace('_Ret','') for b in summary['benchmarks']])}")
         st.write(f"**T-bill col:** {'✅' if summary['tbill_found'] else '❌'}")
-# ── Main execution ────────────────────────────────────────────────────────────
 if not run_button:
-    st.info("👈 Configure parameters in the sidebar and click **🚀 Run All 3 Approaches** to begin.")
     st.stop()
 # ── Filter by start year ──────────────────────────────────────────────────────
-df = df[df.index.year >= start_yr].copy()
-st.write(f"📅 **Data:** {df.index[0].strftime('%Y-%m-%d')} → {df.index[-1].strftime('%Y-%m-%d')}  "
-         f"({df.index[-1].year - df.index[0].year + 1} years)")
-# ── Feature / target extraction ───────────────────────────────────────────────
 try:
-    input_features, target_etfs, tbill_rate = get_features_and_targets(df)
 except ValueError as e:
     st.error(str(e))
     st.stop()
-st.info(f"🎯 **Targets:** {len(target_etfs)} ETFs  ·  **Features:** {len(input_features)} signals  ·  "
-        f"**T-bill rate:** {tbill_rate*100:.2f}%")
-# ── Prepare sequences ─────────────────────────────────────────────────────────
-X_raw    = df[input_features].values.astype(np.float32)
-y_raw    = df[target_etfs].values.astype(np.float32)
-n_etfs   = len(target_etfs)
-n_classes = n_etfs + (1 if include_cash else 0)   # +1 for CASH
-# Fill NaNs with column means
 col_means = np.nanmean(X_raw, axis=0)
 for j in range(X_raw.shape[1]):
     mask = np.isnan(X_raw[:, j])
-    X_raw[mask, j] = col_means[j]
 X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
 y_labels     = returns_to_labels(y_seq, include_cash=include_cash)
-X_train, y_train_r, X_val, y_val_r, X_test, y_test_r = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
-_, y_train_l, _, y_val_l, _, y_test_l                 = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
 X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
 train_size = len(X_train)
 val_size   = len(X_val)
-# Test dates (aligned with y_test)
-test_start  = lookback + train_size + val_size
-test_dates  = df.index[test_start: test_start + len(X_test)]
-test_slice  = slice(test_start, test_start + len(X_test))
-st.success(f"✅ Sequences — Train: {train_size} · Val: {val_size} · Test: {len(X_test)}")
 # ── Train all three approaches ────────────────────────────────────────────────
 results      = {}
-trained_info = {}   # store extra info needed for conviction
 progress = st.progress(0, text="Starting training...")
-# ── Approach 1: Wavelet ───────────────────────────────────────────────────────
 with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
     try:
         model1, hist1, _ = train_approach1(
@@ -163,7 +179,8 @@ with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
         )
         preds1, proba1 = predict_approach1(model1, X_test_s)
         results["Approach 1"] = execute_strategy(
-            preds1, proba1, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash,
         )
         trained_info["Approach 1"] = {"proba": proba1}
         st.success("✅ Approach 1 complete")
@@ -173,7 +190,7 @@ with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
 progress.progress(33, text="Approach 1 done...")
-# ── Approach 2: Regime-Conditioned ───────────────────────────────────────────
 with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
     try:
         model2, hist2, hmm2, regime_cols2 = train_approach2(
@@ -191,7 +208,8 @@ with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
             lookback, train_size, val_size,
         )
         results["Approach 2"] = execute_strategy(
-            preds2, proba2, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash,
         )
         trained_info["Approach 2"] = {"proba": proba2}
         st.success("✅ Approach 2 complete")
@@ -201,7 +219,7 @@ with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
 progress.progress(66, text="Approach 2 done...")
-# ── Approach 3: Multi-Scale ───────────────────────────────────────────────────
 with st.spinner("📡 Training Approach 3 — Multi-Scale CNN-LSTM..."):
     try:
         model3, hist3 = train_approach3(
@@ -211,7 +229,8 @@ with st.spinner("📡 Training Approach 3 — Multi-Scale CNN-LSTM..."):
         )
         preds3, proba3 = predict_approach3(model3, X_test_s)
         results["Approach 3"] = execute_strategy(
-            preds3, proba3, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash,
         )
         trained_info["Approach 3"] = {"proba": proba3}
         st.success("✅ Approach 3 complete")
@@ -227,15 +246,14 @@ winner_name = select_winner(results)
 winner_res  = results.get(winner_name)
 if winner_res is None:
-    st.error("❌ All approaches failed. Please check your data and configuration.")
     st.stop()
-# ── Next trading date ─────────────────────────────────────────────────────────
 next_date = get_next_signal_date()
 st.divider()
-# ── Signal banner (winner) ────────────────────────────────────────────────────
 show_signal_banner(winner_res["next_signal"], next_date, winner_name)
 # ── Conviction panel ──────────────────────────────────────────────────────────
@@ -256,7 +274,6 @@ st.subheader("🏆 Approach Comparison (Winner = Highest Raw Annualised Return)"
 comparison_df = build_comparison_table(results, winner_name)
 show_comparison_table(comparison_df)
-# ── Comparison bar chart ──────────────────────────────────────────────────────
 st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
 st.divider()
@@ -268,6 +285,6 @@ st.plotly_chart(fig, use_container_width=True)
 st.divider()
-# ── Audit trail (winner) ──────────────────────────────────────────────────────
 st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
 show_audit_trail(winner_res["audit_trail"])

 import numpy as np
 # ── Module imports ────────────────────────────────────────────────────────────
+from data.loader      import (load_dataset, check_data_freshness,
+                               get_features_and_targets, dataset_summary)
 from utils.calendar   import get_est_time, is_sync_window, get_next_signal_date
+from models.base      import (build_sequences, train_val_test_split,
+                               scale_features, returns_to_labels)
 from models.approach1_wavelet    import train_approach1, predict_approach1
 from models.approach2_regime     import train_approach2, predict_approach2
 from models.approach3_multiscale import train_approach3, predict_approach3
     st.divider()
+    start_yr     = st.slider("📅 Start Year", 2010, 2024, 2016)
+    fee_bps      = st.slider("💰 Fee (bps)", 0, 50, 10)
+    lookback     = st.slider("📐 Lookback (days)", 20, 60, 30, step=5)
+    epochs       = st.number_input("🔁 Max Epochs", 20, 300, 100, step=10)
     st.divider()
     split_map    = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}
     train_pct, val_pct = split_map[split_option]
+    include_cash = st.checkbox(
+        "💵 Include CASH class", value=True,
+        help="Model can select CASH (earns T-bill rate) instead of any ETF",
+    )
     st.divider()
 st.caption("Approach 1: Wavelet  ·  Approach 2: Regime-Conditioned  ·  Approach 3: Multi-Scale Parallel")
 st.caption("Winner selected by highest raw annualised return on out-of-sample test set.")
+# ── Token check ───────────────────────────────────────────────────────────────
 if not HF_TOKEN:
+    st.error("❌ HF_TOKEN secret not found. Add it to HF Space / GitHub secrets.")
     st.stop()
+# ── Load dataset ──────────────────────────────────────────────────────────────
 with st.spinner("📡 Loading dataset from HuggingFace..."):
+    df_raw = load_dataset(HF_TOKEN)
+if df_raw.empty:
     st.stop()
 # ── Freshness check ───────────────────────────────────────────────────────────
+freshness = check_data_freshness(df_raw)
 show_freshness_status(freshness)
 # ── Dataset summary in sidebar ────────────────────────────────────────────────
 with st.sidebar:
     st.divider()
     st.subheader("📦 Dataset Info")
+    summary = dataset_summary(df_raw)
     if summary:
         st.write(f"**Rows:** {summary['rows']:,}")
         st.write(f"**Range:** {summary['start_date']} → {summary['end_date']}")
+        st.write(f"**ETFs:** {', '.join(summary['etfs_found'])}")
+        st.write(f"**Benchmarks:** {', '.join(summary['benchmarks'])}")
+        st.write(f"**Macro:** {', '.join(summary['macro_found'])}")
         st.write(f"**T-bill col:** {'✅' if summary['tbill_found'] else '❌'}")
+# ── Wait for run button ───────────────────────────────────────────────────────
 if not run_button:
+    st.info("👈 Configure parameters in the sidebar and click **🚀 Run All 3 Approaches**.")
     st.stop()
 # ── Filter by start year ──────────────────────────────────────────────────────
+df = df_raw[df_raw.index.year >= start_yr].copy()
+st.write(
+    f"📅 **Data:** {df.index[0].strftime('%Y-%m-%d')} → {df.index[-1].strftime('%Y-%m-%d')}  "
+    f"({df.index[-1].year - df.index[0].year + 1} years)"
+)
+# ── Features & targets ────────────────────────────────────────────────────────
 try:
+    input_features, target_etfs, tbill_rate, df = get_features_and_targets(df)
 except ValueError as e:
     st.error(str(e))
     st.stop()
+n_etfs    = len(target_etfs)
+n_classes = n_etfs + (1 if include_cash else 0)
+st.info(
+    f"🎯 **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])}  ·  "
+    f"**Features:** {len(input_features)} signals  ·  "
+    f"**T-bill:** {tbill_rate*100:.2f}%"
+)
+# ── Build sequences ───────────────────────────────────────────────────────────
+X_raw = df[input_features].values.astype(np.float32)
+y_raw = df[target_etfs].values.astype(np.float32)
+# Fill any remaining NaNs with column means
 col_means = np.nanmean(X_raw, axis=0)
 for j in range(X_raw.shape[1]):
     mask = np.isnan(X_raw[:, j])
+    if mask.any():
+        X_raw[mask, j] = col_means[j]
 X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
 y_labels     = returns_to_labels(y_seq, include_cash=include_cash)
+(X_train, y_train_r, X_val, y_val_r,
+ X_test,  y_test_r)  = train_val_test_split(X_seq, y_seq,    train_pct, val_pct)
+(_,       y_train_l,  _,    y_val_l,
+ _,       y_test_l)  = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
 X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
 train_size = len(X_train)
 val_size   = len(X_val)
+test_start = lookback + train_size + val_size
+test_dates = df.index[test_start: test_start + len(X_test)]
+test_slice = slice(test_start, test_start + len(X_test))
+st.success(
+    f"✅ Sequences — Train: {train_size:,} · Val: {val_size:,} · Test: {len(X_test):,}"
+)
 # ── Train all three approaches ────────────────────────────────────────────────
 results      = {}
+trained_info = {}
 progress = st.progress(0, text="Starting training...")
+# ── Approach 1 ────────────────────────────────────────────────────────────────
 with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
     try:
         model1, hist1, _ = train_approach1(
         )
         preds1, proba1 = predict_approach1(model1, X_test_s)
         results["Approach 1"] = execute_strategy(
+            preds1, proba1, y_test_r, test_dates,
+            target_etfs, fee_bps, tbill_rate, include_cash,
         )
         trained_info["Approach 1"] = {"proba": proba1}
         st.success("✅ Approach 1 complete")
 progress.progress(33, text="Approach 1 done...")
+# ── Approach 2 ────────────────────────────────────────────────────────────────
 with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
     try:
         model2, hist2, hmm2, regime_cols2 = train_approach2(
             lookback, train_size, val_size,
         )
         results["Approach 2"] = execute_strategy(
+            preds2, proba2, y_test_r, test_dates,
+            target_etfs, fee_bps, tbill_rate, include_cash,
         )
         trained_info["Approach 2"] = {"proba": proba2}
         st.success("✅ Approach 2 complete")
 progress.progress(66, text="Approach 2 done...")
+# ── Approach 3 ────────────────────────────────────────────────────────────────
 with st.spinner("📡 Training Approach 3 — Multi-Scale CNN-LSTM..."):
     try:
         model3, hist3 = train_approach3(
         )
         preds3, proba3 = predict_approach3(model3, X_test_s)
         results["Approach 3"] = execute_strategy(
+            preds3, proba3, y_test_r, test_dates,
+            target_etfs, fee_bps, tbill_rate, include_cash,
         )
         trained_info["Approach 3"] = {"proba": proba3}
         st.success("✅ Approach 3 complete")
 winner_res  = results.get(winner_name)
 if winner_res is None:
+    st.error("❌ All approaches failed. Please check data and configuration.")
     st.stop()
 next_date = get_next_signal_date()
 st.divider()
+# ── Signal banner ─────────────────────────────────────────────────────────────
 show_signal_banner(winner_res["next_signal"], next_date, winner_name)
 # ── Conviction panel ──────────────────────────────────────────────────────────
 comparison_df = build_comparison_table(results, winner_name)
 show_comparison_table(comparison_df)
 st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
 st.divider()
 st.divider()
+# ── Audit trail ───────────────────────────────────────────────────────────────
 st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
 show_audit_trail(winner_res["audit_trail"])

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py CHANGED Viewed

@@ -3,6 +3,10 @@ data/loader.py
 Loads master_data.parquet from HF Dataset.
 Validates freshness against the last NYSE trading day.
 No external pings — all data comes from HF Dataset only.
 """
 import pandas as pd
@@ -22,31 +26,29 @@ except ImportError:
 DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
 PARQUET_FILE = "master_data.parquet"
-# Columns expected in the dataset
-REQUIRED_ETF_COLS   = ["TLT_Ret", "TBT_Ret", "VNQ_Ret", "SLV_Ret", "GLD_Ret"]
-BENCHMARK_COLS      = ["SPY_Ret", "AGG_Ret"]
-TBILL_COL           = "DTB3"          # 3m T-bill column in HF dataset
-TARGET_ETFS         = REQUIRED_ETF_COLS   # 5 targets (no CASH in returns, CASH handled in strategy)
 # ── NYSE calendar helpers ─────────────────────────────────────────────────────
-def get_last_nyse_trading_day(as_of: datetime = None) -> datetime.date:
-    """Return the most recent NYSE trading day before or on as_of (default: today EST)."""
     est = pytz.timezone("US/Eastern")
     if as_of is None:
         as_of = datetime.now(est)
     today = as_of.date()
     if NYSE_CAL_AVAILABLE:
         try:
-            nyse = mcal.get_calendar("NYSE")
-            # Look back up to 10 days to find last trading day
             start = today - timedelta(days=10)
-            schedule = nyse.schedule(start_date=start, end_date=today)
-            if len(schedule) > 0:
-                return schedule.index[-1].date()
         except Exception:
             pass
@@ -57,18 +59,6 @@ def get_last_nyse_trading_day(as_of: datetime = None) -> datetime.date:
     return candidate
-def is_nyse_trading_day(date) -> bool:
-    """Return True if date is a NYSE trading day."""
-    if NYSE_CAL_AVAILABLE:
-        try:
-            nyse = mcal.get_calendar("NYSE")
-            schedule = nyse.schedule(start_date=date, end_date=date)
-            return len(schedule) > 0
-        except Exception:
-            pass
-    return date.weekday() < 5
 # ── Data loading ──────────────────────────────────────────────────────────────
 @st.cache_data(ttl=3600, show_spinner=False)
@@ -88,10 +78,10 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
         # Ensure DatetimeIndex
         if not isinstance(df.index, pd.DatetimeIndex):
-            if "Date" in df.columns:
-                df = df.set_index("Date")
-            elif "date" in df.columns:
-                df = df.set_index("date")
             df.index = pd.to_datetime(df.index)
         df = df.sort_index()
@@ -107,14 +97,6 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
 def check_data_freshness(df: pd.DataFrame) -> dict:
     """
     Check whether the dataset contains data for the last NYSE trading day.
-    Returns a dict:
-        {
-            "fresh": bool,
-            "last_date_in_data": date,
-            "expected_date": date,
-            "message": str
-        }
     """
     if df.empty:
         return {
@@ -126,8 +108,7 @@ def check_data_freshness(df: pd.DataFrame) -> dict:
     last_date_in_data = df.index[-1].date()
     expected_date     = get_last_nyse_trading_day()
-    fresh = last_date_in_data >= expected_date
     if fresh:
         message = f"✅ Dataset is up to date through **{last_date_in_data}**."
@@ -150,66 +131,105 @@ def check_data_freshness(df: pd.DataFrame) -> dict:
 def get_features_and_targets(df: pd.DataFrame):
     """
-    Extract input feature columns and target ETF return columns from the dataset.
     Returns:
-        input_features : list of column names
-        target_etfs    : list of ETF return column names (e.g. TLT_Ret)
-        tbill_rate     : latest 3m T-bill rate as a float (annualised, e.g. 0.045)
     """
-    # Target ETF return columns
-    target_etfs = [c for c in REQUIRED_ETF_COLS if c in df.columns]
-    if not target_etfs:
         raise ValueError(
-            f"No target ETF columns found. Expected: {REQUIRED_ETF_COLS}. "
             f"Found in dataset: {list(df.columns)}"
         )
-    # Input features: Z-scores, vol, regime, yield curve, credit, rates, VIX terms
-    exclude = set(target_etfs + BENCHMARK_COLS + [TBILL_COL])
     input_features = [
         c for c in df.columns
         if c not in exclude
-        and (
-            c.endswith("_Z")
-            or c.endswith("_Vol")
-            or "Regime" in c
-            or "YC_"    in c
-            or "Credit_" in c
-            or "Rates_"  in c
-            or "VIX_"    in c
-            or "Spread"  in c
-            or "DXY"     in c
-            or "VIX"     in c
-            or "T10Y"    in c
-        )
     ]
-    # 3m T-bill rate (for CASH return & Sharpe)
-    tbill_rate = 0.045   # default fallback
     if TBILL_COL in df.columns:
         raw = df[TBILL_COL].dropna()
         if len(raw) > 0:
-            last_val = raw.iloc[-1]
-            # DTB3 is typically in percent (e.g. 5.25 means 5.25%)
-            tbill_rate = float(last_val) / 100 if last_val > 1 else float(last_val)
-    return input_features, target_etfs, tbill_rate
-# ── Column info helper (for sidebar display) ──────────────────────────────────
 def dataset_summary(df: pd.DataFrame) -> dict:
-    """Return a brief summary dict for sidebar display."""
     if df.empty:
         return {}
     return {
-        "rows":       len(df),
-        "columns":    len(df.columns),
-        "start_date": df.index[0].strftime("%Y-%m-%d"),
-        "end_date":   df.index[-1].strftime("%Y-%m-%d"),
-        "etfs_found": [c for c in REQUIRED_ETF_COLS if c in df.columns],
-        "benchmarks": [c for c in BENCHMARK_COLS     if c in df.columns],
         "tbill_found": TBILL_COL in df.columns,
     }

 Loads master_data.parquet from HF Dataset.
 Validates freshness against the last NYSE trading day.
 No external pings — all data comes from HF Dataset only.
+Actual dataset columns (from parquet inspection):
+  ETFs    : AGG, GLD, SLV, SPY, TBT, TLT, VNQ
+  Macro   : VIX, DXY, T10Y2Y, TBILL_3M, IG_SPREAD, HY_SPREAD
 """
 import pandas as pd
 DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
 PARQUET_FILE = "master_data.parquet"
+# ── Actual column names in the dataset ───────────────────────────────────────
+TARGET_ETF_COLS  = ["TLT", "TBT", "VNQ", "SLV", "GLD"]   # traded ETFs
+BENCHMARK_COLS   = ["SPY", "AGG"]                           # chart only
+TBILL_COL        = "TBILL_3M"                               # 3m T-bill rate
+MACRO_COLS       = ["VIX", "DXY", "T10Y2Y", "IG_SPREAD", "HY_SPREAD"]
 # ── NYSE calendar helpers ─────────────────────────────────────────────────────
+def get_last_nyse_trading_day(as_of=None):
+    """Return the most recent NYSE trading day on or before as_of (default: today EST)."""
     est = pytz.timezone("US/Eastern")
     if as_of is None:
         as_of = datetime.now(est)
     today = as_of.date()
     if NYSE_CAL_AVAILABLE:
         try:
+            nyse  = mcal.get_calendar("NYSE")
             start = today - timedelta(days=10)
+            sched = nyse.schedule(start_date=start, end_date=today)
+            if len(sched) > 0:
+                return sched.index[-1].date()
         except Exception:
             pass
     return candidate
 # ── Data loading ──────────────────────────────────────────────────────────────
 @st.cache_data(ttl=3600, show_spinner=False)
         # Ensure DatetimeIndex
         if not isinstance(df.index, pd.DatetimeIndex):
+            for col in ["Date", "date", "DATE"]:
+                if col in df.columns:
+                    df = df.set_index(col)
+                    break
             df.index = pd.to_datetime(df.index)
         df = df.sort_index()
 def check_data_freshness(df: pd.DataFrame) -> dict:
     """
     Check whether the dataset contains data for the last NYSE trading day.
     """
     if df.empty:
         return {
     last_date_in_data = df.index[-1].date()
     expected_date     = get_last_nyse_trading_day()
+    fresh             = last_date_in_data >= expected_date
     if fresh:
         message = f"✅ Dataset is up to date through **{last_date_in_data}**."
 def get_features_and_targets(df: pd.DataFrame):
     """
+    Extract input feature columns and target ETF return columns.
+    The dataset stores raw price or return values directly under ticker names.
+    We compute daily log returns for target ETFs if they are not already returns.
     Returns:
+        input_features : list of column names to use as model inputs
+        target_etfs    : list of ETF column names (after return computation)
+        tbill_rate     : latest 3m T-bill rate as float (annualised, e.g. 0.045)
+        df             : DataFrame (possibly with new _Ret columns added)
     """
+    # ── Confirm target ETFs exist ─────────────────────────────────────────────
+    missing = [c for c in TARGET_ETF_COLS if c not in df.columns]
+    if missing:
         raise ValueError(
+            f"Missing ETF columns: {missing}. "
             f"Found in dataset: {list(df.columns)}"
         )
+    # ── Build return columns ──────────────────────────────────────────────────
+    # If values look like prices (>5), compute pct returns.
+    # If they already look like small returns (<1 in abs), use as-is.
+    target_etfs = []
+    for col in TARGET_ETF_COLS:
+        ret_col = f"{col}_Ret"
+        if ret_col not in df.columns:
+            sample = df[col].dropna()
+            if len(sample) > 0 and abs(sample.median()) > 1:
+                # Looks like price — compute pct change
+                df[ret_col] = df[col].pct_change()
+            else:
+                # Already returns
+                df[ret_col] = df[col]
+        target_etfs.append(ret_col)
+    # Same for benchmarks
+    for col in BENCHMARK_COLS:
+        ret_col = f"{col}_Ret"
+        if ret_col not in df.columns and col in df.columns:
+            sample = df[col].dropna()
+            if len(sample) > 0 and abs(sample.median()) > 1:
+                df[ret_col] = df[col].pct_change()
+            else:
+                df[ret_col] = df[col]
+    # Drop rows with NaN in target columns (first row after pct_change)
+    df = df.dropna(subset=target_etfs)
+    # ── Input features ────────────────────────────────────────────────────────
+    # Use macro columns directly; exclude ETF price/return cols and benchmarks
+    exclude = set(
+        TARGET_ETF_COLS + BENCHMARK_COLS +
+        target_etfs +
+        [f"{c}_Ret" for c in BENCHMARK_COLS] +
+        [TBILL_COL]
+    )
     input_features = [
         c for c in df.columns
         if c not in exclude
+        and c in (MACRO_COLS + [
+            col for col in df.columns
+            if any(k in col for k in ["_Z", "_Vol", "Regime", "YC_", "Credit_",
+                                       "Rates_", "VIX_", "Spread", "DXY", "T10Y"])
+        ])
     ]
+    # Fallback: if none matched, use all non-excluded numeric columns
+    if not input_features:
+        input_features = [
+            c for c in df.columns
+            if c not in exclude
+            and pd.api.types.is_numeric_dtype(df[c])
+        ]
+    # ── T-bill rate ───────────────────────────────────────────────────────────
+    tbill_rate = 0.045   # default
     if TBILL_COL in df.columns:
         raw = df[TBILL_COL].dropna()
         if len(raw) > 0:
+            last_val   = float(raw.iloc[-1])
+            tbill_rate = last_val / 100 if last_val > 1 else last_val
+    return input_features, target_etfs, tbill_rate, df
+# ── Dataset summary ───────────────────────────────────────────���───────────────
 def dataset_summary(df: pd.DataFrame) -> dict:
     if df.empty:
         return {}
     return {
+        "rows":        len(df),
+        "columns":     len(df.columns),
+        "start_date":  df.index[0].strftime("%Y-%m-%d"),
+        "end_date":    df.index[-1].strftime("%Y-%m-%d"),
+        "etfs_found":  [c for c in TARGET_ETF_COLS  if c in df.columns],
+        "benchmarks":  [c for c in BENCHMARK_COLS   if c in df.columns],
+        "macro_found": [c for c in MACRO_COLS        if c in df.columns],
         "tbill_found": TBILL_COL in df.columns,
     }

	@@ -0,0 +1 @@


1	+

@@ -1,3 +1,15 @@
 # P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
 Macro-driven ETF rotation using three augmented CNN-LSTM variants.

+---
+title: P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
+emoji: 🧠
+colorFrom: green
+colorTo: blue
+sdk: streamlit
+sdk_version: "1.32.0"
+python_version: "3.10"
+app_file: app.py
+pinned: false
+---
 # P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
 Macro-driven ETF rotation using three augmented CNN-LSTM variants.

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

	@@ -0,0 +1,215 @@

+"""
+data/loader.py
+Loads master_data.parquet from HF Dataset.
+Validates freshness against the last NYSE trading day.
+No external pings — all data comes from HF Dataset only.
+"""
+import pandas as pd
+import numpy as np
+import streamlit as st
+from huggingface_hub import hf_hub_download
+from datetime import datetime, timedelta
+import pytz
+import os
+try:
+    import pandas_market_calendars as mcal
+    NYSE_CAL_AVAILABLE = True
+except ImportError:
+    NYSE_CAL_AVAILABLE = False
+DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
+PARQUET_FILE = "master_data.parquet"
+# Columns expected in the dataset
+REQUIRED_ETF_COLS   = ["TLT_Ret", "TBT_Ret", "VNQ_Ret", "SLV_Ret", "GLD_Ret"]
+BENCHMARK_COLS      = ["SPY_Ret", "AGG_Ret"]
+TBILL_COL           = "DTB3"          # 3m T-bill column in HF dataset
+TARGET_ETFS         = REQUIRED_ETF_COLS   # 5 targets (no CASH in returns, CASH handled in strategy)
+# ── NYSE calendar helpers ─────────────────────────────────────────────────────
+def get_last_nyse_trading_day(as_of: datetime = None) -> datetime.date:
+    """Return the most recent NYSE trading day before or on as_of (default: today EST)."""
+    est = pytz.timezone("US/Eastern")
+    if as_of is None:
+        as_of = datetime.now(est)
+    today = as_of.date()
+    if NYSE_CAL_AVAILABLE:
+        try:
+            nyse = mcal.get_calendar("NYSE")
+            # Look back up to 10 days to find last trading day
+            start = today - timedelta(days=10)
+            schedule = nyse.schedule(start_date=start, end_date=today)
+            if len(schedule) > 0:
+                return schedule.index[-1].date()
+        except Exception:
+            pass
+    # Fallback: skip weekends
+    candidate = today
+    while candidate.weekday() >= 5:
+        candidate -= timedelta(days=1)
+    return candidate
+def is_nyse_trading_day(date) -> bool:
+    """Return True if date is a NYSE trading day."""
+    if NYSE_CAL_AVAILABLE:
+        try:
+            nyse = mcal.get_calendar("NYSE")
+            schedule = nyse.schedule(start_date=date, end_date=date)
+            return len(schedule) > 0
+        except Exception:
+            pass
+    return date.weekday() < 5
+# ── Data loading ──────────────────────────────────────────────────────────────
+@st.cache_data(ttl=3600, show_spinner=False)
+def load_dataset(hf_token: str) -> pd.DataFrame:
+    """
+    Download master_data.parquet from HF Dataset and return as DataFrame.
+    Cached for 1 hour. Index is parsed as DatetimeIndex.
+    """
+    try:
+        path = hf_hub_download(
+            repo_id=DATASET_REPO,
+            filename=PARQUET_FILE,
+            repo_type="dataset",
+            token=hf_token,
+        )
+        df = pd.read_parquet(path)
+        # Ensure DatetimeIndex
+        if not isinstance(df.index, pd.DatetimeIndex):
+            if "Date" in df.columns:
+                df = df.set_index("Date")
+            elif "date" in df.columns:
+                df = df.set_index("date")
+            df.index = pd.to_datetime(df.index)
+        df = df.sort_index()
+        return df
+    except Exception as e:
+        st.error(f"❌ Failed to load dataset from HuggingFace: {e}")
+        return pd.DataFrame()
+# ── Freshness check ───────────────────────────────────────────────────────────
+def check_data_freshness(df: pd.DataFrame) -> dict:
+    """
+    Check whether the dataset contains data for the last NYSE trading day.
+    Returns a dict:
+        {
+            "fresh": bool,
+            "last_date_in_data": date,
+            "expected_date": date,
+            "message": str
+        }
+    """
+    if df.empty:
+        return {
+            "fresh": False,
+            "last_date_in_data": None,
+            "expected_date": None,
+            "message": "Dataset is empty.",
+        }
+    last_date_in_data = df.index[-1].date()
+    expected_date     = get_last_nyse_trading_day()
+    fresh = last_date_in_data >= expected_date
+    if fresh:
+        message = f"✅ Dataset is up to date through **{last_date_in_data}**."
+    else:
+        message = (
+            f"⚠️ **{expected_date}** data not yet updated in dataset. "
+            f"Latest available: **{last_date_in_data}**. "
+            f"Please check back later — the dataset updates daily after market close."
+        )
+    return {
+        "fresh": fresh,
+        "last_date_in_data": last_date_in_data,
+        "expected_date": expected_date,
+        "message": message,
+    }
+# ── Feature / target extraction ───────────────────────────────────────��───────
+def get_features_and_targets(df: pd.DataFrame):
+    """
+    Extract input feature columns and target ETF return columns from the dataset.
+    Returns:
+        input_features : list of column names
+        target_etfs    : list of ETF return column names (e.g. TLT_Ret)
+        tbill_rate     : latest 3m T-bill rate as a float (annualised, e.g. 0.045)
+    """
+    # Target ETF return columns
+    target_etfs = [c for c in REQUIRED_ETF_COLS if c in df.columns]
+    if not target_etfs:
+        raise ValueError(
+            f"No target ETF columns found. Expected: {REQUIRED_ETF_COLS}. "
+            f"Found in dataset: {list(df.columns)}"
+        )
+    # Input features: Z-scores, vol, regime, yield curve, credit, rates, VIX terms
+    exclude = set(target_etfs + BENCHMARK_COLS + [TBILL_COL])
+    input_features = [
+        c for c in df.columns
+        if c not in exclude
+        and (
+            c.endswith("_Z")
+            or c.endswith("_Vol")
+            or "Regime" in c
+            or "YC_"    in c
+            or "Credit_" in c
+            or "Rates_"  in c
+            or "VIX_"    in c
+            or "Spread"  in c
+            or "DXY"     in c
+            or "VIX"     in c
+            or "T10Y"    in c
+        )
+    ]
+    # 3m T-bill rate (for CASH return & Sharpe)
+    tbill_rate = 0.045   # default fallback
+    if TBILL_COL in df.columns:
+        raw = df[TBILL_COL].dropna()
+        if len(raw) > 0:
+            last_val = raw.iloc[-1]
+            # DTB3 is typically in percent (e.g. 5.25 means 5.25%)
+            tbill_rate = float(last_val) / 100 if last_val > 1 else float(last_val)
+    return input_features, target_etfs, tbill_rate
+# ── Column info helper (for sidebar display) ──────────────────────────────────
+def dataset_summary(df: pd.DataFrame) -> dict:
+    """Return a brief summary dict for sidebar display."""
+    if df.empty:
+        return {}
+    return {
+        "rows":       len(df),
+        "columns":    len(df.columns),
+        "start_date": df.index[0].strftime("%Y-%m-%d"),
+        "end_date":   df.index[-1].strftime("%Y-%m-%d"),
+        "etfs_found": [c for c in REQUIRED_ETF_COLS if c in df.columns],
+        "benchmarks": [c for c in BENCHMARK_COLS     if c in df.columns],
+        "tbill_found": TBILL_COL in df.columns,
+    }

@@ -1,19 +1,115 @@
 ---
-title: P2 ETF CNN LSTM ALTERNATIVE APPROACHES
-emoji: 🚀
-colorFrom: red
-colorTo: red
-sdk: docker
-app_port: 8501
-tags:
-- streamlit
-pinned: false
-short_description: Streamlit template space
 ---
-# Welcome to Streamlit!
-Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).

+# P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
+Macro-driven ETF rotation using three augmented CNN-LSTM variants.
+Winner selected by **highest raw annualised return** on the out-of-sample test set.
+---
+## Architecture Overview
+| Approach | Core Idea | Key Addition |
+|---|---|---|
+| **1 — Wavelet** | DWT decomposes each macro signal into frequency subbands before the CNN | Separates trend / cycle / noise |
+| **2 — Regime-Conditioned** | HMM detects macro regimes; one-hot regime label concatenated into the network | Removes non-stationarity |
+| **3 — Multi-Scale Parallel** | Three CNN towers (kernels 3, 7, 21 days) run in parallel before the LSTM | Captures momentum + cycle + trend simultaneously |
 ---
+## ETF Universe
+| Ticker | Description |
+|---|---|
+| TLT | 20+ Year Treasury Bond |
+| TBT | 20+ Year Treasury Short (2×) |
+| VNQ | Real Estate (REIT) |
+| SLV | Silver |
+| GLD | Gold |
+| CASH | 3m T-bill rate (from HF dataset) |
+Benchmarks (chart only, not traded): **SPY**, **AGG**
+---
+## Data
+All data sourced exclusively from:
+**`P2SAMAPA/fi-etf-macro-signal-master-data`** (HuggingFace Dataset)
+File: `master_data.parquet`
+No external API calls (no yfinance, no FRED).
+The app checks daily whether the prior NYSE trading day's data is present in the dataset.
 ---
+## Project Structure
+```
+├── .github/
+│   └── workflows/
+│       └── sync.yml            # Auto-sync GitHub → HF Space on push to main
+│
+├── app.py                      # Streamlit orchestrator (UI wiring only)
+│
+├── data/
+│   └── loader.py               # HF dataset load, freshness check, column validation
+│
+├── models/
+│   ├── base.py                 # Shared: sequences, splits, scaling, callbacks
+│   ├── approach1_wavelet.py    # Wavelet CNN-LSTM
+│   ├── approach2_regime.py     # Regime-Conditioned CNN-LSTM
+│   └── approach3_multiscale.py # Multi-Scale Parallel CNN-LSTM
+│
+├── strategy/
+│   └── backtest.py             # execute_strategy, metrics, winner selection
+│
+├── signals/
+│   └── conviction.py           # Z-score conviction scoring
+│
+├── ui/
+│   ├── components.py           # Banner, conviction panel, metrics, audit trail
+│   └── charts.py               # Plotly equity curve + comparison bar chart
+│
+├── utils/
+│   └── calendar.py             # NYSE calendar, next trading day, EST time
+│
+├── requirements.txt
+└── README.md
+```
+---
+## Secrets Required
+| Secret | Where | Purpose |
+|---|---|---|
+| `HF_TOKEN` | GitHub + HF Space | Read HF dataset · Sync HF Space |
+Set in:
+- GitHub: `Settings → Secrets → Actions → New repository secret`
+- HF Space: `Settings → Repository secrets`
+---
+## Deployment
+Push to `main` → GitHub Actions (`sync.yml`) automatically syncs to HF Space.
+### Local development
+```bash
+pip install -r requirements.txt
+export HF_TOKEN=your_token
+streamlit run app.py
+```
+---
+## Output UI
+1. **Data freshness warning** — alerts if prior NYSE trading day data is missing
+2. **Next Trading Day Signal** — date + ETF from the winning approach
+3. **Signal Conviction** — Z-score gauge + per-ETF probability bars
+4. **Performance Metrics** — Annualised Return, Sharpe, Hit Ratio, Max DD
+5. **Approach Comparison Table** — all three approaches side by side
+6. **Equity Curves** — all three approaches + SPY + AGG benchmarks
+7. **Audit Trail** — last 20 trading days for the winning approach

	@@ -0,0 +1,273 @@

+"""
+app.py
+P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
+Streamlit orchestrator — UI wiring only, no business logic here.
+"""
+import os
+import streamlit as st
+import pandas as pd
+import numpy as np
+# ── Module imports ────────────────────────────────────────────────────────────
+from data.loader      import load_dataset, check_data_freshness, get_features_and_targets, dataset_summary
+from utils.calendar   import get_est_time, is_sync_window, get_next_signal_date
+from models.base      import build_sequences, train_val_test_split, scale_features, returns_to_labels
+from models.approach1_wavelet    import train_approach1, predict_approach1
+from models.approach2_regime     import train_approach2, predict_approach2
+from models.approach3_multiscale import train_approach3, predict_approach3
+from strategy.backtest  import execute_strategy, select_winner, build_comparison_table
+from signals.conviction import compute_conviction
+from ui.components import (
+    show_freshness_status, show_signal_banner, show_conviction_panel,
+    show_metrics_row, show_comparison_table, show_audit_trail,
+)
+from ui.charts import equity_curve_chart, comparison_bar_chart
+# ── Page config ───────────────────────────────────────────────────────────────
+st.set_page_config(
+    page_title="P2-ETF-CNN-LSTM",
+    page_icon="🧠",
+    layout="wide",
+)
+# ── Secrets ───────────────────────────────────────────────────────────────────
+HF_TOKEN = os.getenv("HF_TOKEN", "")
+# ── Sidebar ───────────────────────────────────────────────────────────────────
+with st.sidebar:
+    st.header("⚙️ Configuration")
+    now_est = get_est_time()
+    st.write(f"🕒 **EST:** {now_est.strftime('%H:%M:%S')}")
+    if is_sync_window():
+        st.success("✅ Sync Window Active")
+    else:
+        st.info("⏸️ Sync Window Inactive")
+    st.divider()
+    start_yr = st.slider("📅 Start Year", 2010, 2024, 2016)
+    fee_bps  = st.slider("💰 Fee (bps)", 0, 50, 10)
+    lookback = st.slider("📐 Lookback (days)", 20, 60, 30, step=5)
+    epochs   = st.number_input("🔁 Max Epochs", 20, 300, 100, step=10)
+    st.divider()
+    split_option = st.selectbox("📊 Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0)
+    split_map    = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}
+    train_pct, val_pct = split_map[split_option]
+    include_cash = st.checkbox("💵 Include CASH class", value=True,
+                               help="Model can select CASH (earns T-bill rate) as an alternative to any ETF")
+    st.divider()
+    run_button = st.button("🚀 Run All 3 Approaches", type="primary", use_container_width=True)
+# ── Title ─────────────────────────────────────────────────────────────────────
+st.title("🧠 P2-ETF-CNN-LSTM")
+st.caption("Approach 1: Wavelet  ·  Approach 2: Regime-Conditioned  ·  Approach 3: Multi-Scale Parallel")
+st.caption("Winner selected by highest raw annualised return on out-of-sample test set.")
+# ── Load data (always, to check freshness) ────────────────────────────────────
+if not HF_TOKEN:
+    st.error("❌ HF_TOKEN secret not found. Please add it to your HF Space / GitHub secrets.")
+    st.stop()
+with st.spinner("📡 Loading dataset from HuggingFace..."):
+    df = load_dataset(HF_TOKEN)
+if df.empty:
+    st.stop()
+# ── Freshness check ───────────────────────────────────────────────────────────
+freshness = check_data_freshness(df)
+show_freshness_status(freshness)
+# ── Dataset summary in sidebar ────────────────────────────────────────────────
+with st.sidebar:
+    st.divider()
+    st.subheader("📦 Dataset Info")
+    summary = dataset_summary(df)
+    if summary:
+        st.write(f"**Rows:** {summary['rows']:,}")
+        st.write(f"**Range:** {summary['start_date']} → {summary['end_date']}")
+        st.write(f"**ETFs:** {', '.join([e.replace('_Ret','') for e in summary['etfs_found']])}")
+        st.write(f"**Benchmarks:** {', '.join([b.replace('_Ret','') for b in summary['benchmarks']])}")
+        st.write(f"**T-bill col:** {'✅' if summary['tbill_found'] else '❌'}")
+# ── Main execution ────────────────────────────────────────────────────────────
+if not run_button:
+    st.info("👈 Configure parameters in the sidebar and click **🚀 Run All 3 Approaches** to begin.")
+    st.stop()
+# ── Filter by start year ──────────────────────────────────────────────────────
+df = df[df.index.year >= start_yr].copy()
+st.write(f"📅 **Data:** {df.index[0].strftime('%Y-%m-%d')} → {df.index[-1].strftime('%Y-%m-%d')}  "
+         f"({df.index[-1].year - df.index[0].year + 1} years)")
+# ── Feature / target extraction ───────────────────────────────────────────────
+try:
+    input_features, target_etfs, tbill_rate = get_features_and_targets(df)
+except ValueError as e:
+    st.error(str(e))
+    st.stop()
+st.info(f"🎯 **Targets:** {len(target_etfs)} ETFs  ·  **Features:** {len(input_features)} signals  ·  "
+        f"**T-bill rate:** {tbill_rate*100:.2f}%")
+# ── Prepare sequences ─────────────────────────────────────────────────────────
+X_raw    = df[input_features].values.astype(np.float32)
+y_raw    = df[target_etfs].values.astype(np.float32)
+n_etfs   = len(target_etfs)
+n_classes = n_etfs + (1 if include_cash else 0)   # +1 for CASH
+# Fill NaNs with column means
+col_means = np.nanmean(X_raw, axis=0)
+for j in range(X_raw.shape[1]):
+    mask = np.isnan(X_raw[:, j])
+    X_raw[mask, j] = col_means[j]
+X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
+y_labels     = returns_to_labels(y_seq, include_cash=include_cash)
+X_train, y_train_r, X_val, y_val_r, X_test, y_test_r = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
+_, y_train_l, _, y_val_l, _, y_test_l                 = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
+X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
+train_size = len(X_train)
+val_size   = len(X_val)
+# Test dates (aligned with y_test)
+test_start  = lookback + train_size + val_size
+test_dates  = df.index[test_start: test_start + len(X_test)]
+test_slice  = slice(test_start, test_start + len(X_test))
+st.success(f"✅ Sequences — Train: {train_size} · Val: {val_size} · Test: {len(X_test)}")
+# ── Train all three approaches ────────────────────────────────────────────────
+results      = {}
+trained_info = {}   # store extra info needed for conviction
+progress = st.progress(0, text="Starting training...")
+# ── Approach 1: Wavelet ───────────────────────────────────────────────────────
+with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
+    try:
+        model1, hist1, _ = train_approach1(
+            X_train_s, y_train_l,
+            X_val_s,   y_val_l,
+            n_classes=n_classes, epochs=int(epochs),
+        )
+        preds1, proba1 = predict_approach1(model1, X_test_s)
+        results["Approach 1"] = execute_strategy(
+            preds1, proba1, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash,
+        )
+        trained_info["Approach 1"] = {"proba": proba1}
+        st.success("✅ Approach 1 complete")
+    except Exception as e:
+        st.warning(f"⚠️ Approach 1 failed: {e}")
+        results["Approach 1"] = None
+progress.progress(33, text="Approach 1 done...")
+# ── Approach 2: Regime-Conditioned ───────────────────────────────────────────
+with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
+    try:
+        model2, hist2, hmm2, regime_cols2 = train_approach2(
+            X_train_s, y_train_l,
+            X_val_s,   y_val_l,
+            X_flat_all=X_raw,
+            feature_names=input_features,
+            lookback=lookback,
+            train_size=train_size,
+            val_size=val_size,
+            n_classes=n_classes, epochs=int(epochs),
+        )
+        preds2, proba2 = predict_approach2(
+            model2, X_test_s, X_raw, regime_cols2, hmm2,
+            lookback, train_size, val_size,
+        )
+        results["Approach 2"] = execute_strategy(
+            preds2, proba2, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash,
+        )
+        trained_info["Approach 2"] = {"proba": proba2}
+        st.success("✅ Approach 2 complete")
+    except Exception as e:
+        st.warning(f"⚠️ Approach 2 failed: {e}")
+        results["Approach 2"] = None
+progress.progress(66, text="Approach 2 done...")
+# ── Approach 3: Multi-Scale ───────────────────────────────────────────────────
+with st.spinner("📡 Training Approach 3 — Multi-Scale CNN-LSTM..."):
+    try:
+        model3, hist3 = train_approach3(
+            X_train_s, y_train_l,
+            X_val_s,   y_val_l,
+            n_classes=n_classes, epochs=int(epochs),
+        )
+        preds3, proba3 = predict_approach3(model3, X_test_s)
+        results["Approach 3"] = execute_strategy(
+            preds3, proba3, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash,
+        )
+        trained_info["Approach 3"] = {"proba": proba3}
+        st.success("✅ Approach 3 complete")
+    except Exception as e:
+        st.warning(f"⚠️ Approach 3 failed: {e}")
+        results["Approach 3"] = None
+progress.progress(100, text="All approaches complete!")
+progress.empty()
+# ── Select winner ─────────────────────────────────────────────────────────────
+winner_name = select_winner(results)
+winner_res  = results.get(winner_name)
+if winner_res is None:
+    st.error("❌ All approaches failed. Please check your data and configuration.")
+    st.stop()
+# ── Next trading date ─────────────────────────────────────────────────────────
+next_date = get_next_signal_date()
+st.divider()
+# ── Signal banner (winner) ────────────────────────────────────────────────────
+show_signal_banner(winner_res["next_signal"], next_date, winner_name)
+# ── Conviction panel ──────────────────────────────────────────────────────────
+winner_proba = trained_info[winner_name]["proba"]
+conviction   = compute_conviction(winner_proba[-1], target_etfs, include_cash)
+show_conviction_panel(conviction)
+st.divider()
+# ── Winner metrics ────────────────────────────────────────────────────────────
+st.subheader(f"📊 {winner_name} — Performance Metrics")
+show_metrics_row(winner_res, tbill_rate)
+st.divider()
+# ── Comparison table ──────────────────────────────────────────────────────────
+st.subheader("🏆 Approach Comparison (Winner = Highest Raw Annualised Return)")
+comparison_df = build_comparison_table(results, winner_name)
+show_comparison_table(comparison_df)
+# ── Comparison bar chart ──────────────────────────────────────────────────────
+st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
+st.divider()
+# ── Equity curves ─────────────────────────────────────────────────────────────
+st.subheader("📈 Out-of-Sample Equity Curves — All Approaches vs Benchmarks")
+fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
+st.plotly_chart(fig, use_container_width=True)
+st.divider()
+# ── Audit trail (winner) ──────────────────────────────────────────────────────
+st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
+show_audit_trail(winner_res["audit_trail"])

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

	@@ -0,0 +1,20 @@

+FROM python:3.13.5-slim
+WORKDIR /app
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt ./
+COPY src/ ./src/
+RUN pip3 install -r requirements.txt
+EXPOSE 8501
+HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
+ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

	@@ -0,0 +1,19 @@

+---
+title: P2 ETF CNN LSTM ALTERNATIVE APPROACHES
+emoji: 🚀
+colorFrom: red
+colorTo: red
+sdk: docker
+app_port: 8501
+tags:
+- streamlit
+pinned: false
+short_description: Streamlit template space
+---
+# Welcome to Streamlit!
+Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
+If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
+forums](https://discuss.streamlit.io).

	@@ -0,0 +1,3 @@

+altair
+pandas
+streamlit

	@@ -0,0 +1,40 @@

+import altair as alt
+import numpy as np
+import pandas as pd
+import streamlit as st
+"""
+# Welcome to Streamlit!
+Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
+If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
+forums](https://discuss.streamlit.io).
+In the meantime, below is an example of what you can do with just a few lines of code:
+"""
+num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
+num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
+indices = np.linspace(0, 1, num_points)
+theta = 2 * np.pi * num_turns * indices
+radius = indices
+x = radius * np.cos(theta)
+y = radius * np.sin(theta)
+df = pd.DataFrame({
+    "x": x,
+    "y": y,
+    "idx": indices,
+    "rand": np.random.randn(num_points),
+})
+st.altair_chart(alt.Chart(df, height=700, width=700)
+    .mark_point(filled=True)
+    .encode(
+        x=alt.X("x", axis=None),
+        y=alt.Y("y", axis=None),
+        color=alt.Color("idx", legend=None, scale=alt.Scale()),
+        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
+    ))

@@ -1,3 +1,29 @@
-altair
-pandas
-streamlit

+# Core
+streamlit>=1.32.0
+pandas>=2.0.0
+numpy>=1.24.0
+# Hugging Face
+huggingface_hub>=0.21.0
+datasets>=2.18.0
+# Machine Learning
+tensorflow>=2.14.0
+scikit-learn>=1.3.0
+xgboost>=2.0.0
+# Wavelet (Approach 1)
+PyWavelets>=1.5.0
+# Regime detection (Approach 2)
+hmmlearn>=0.3.0
+# Visualisation
+plotly>=5.18.0
+# NYSE Calendar
+pandas_market_calendars>=4.3.0
+pytz>=2024.1
+# Parquet
+pyarrow>=14.0.0

	@@ -0,0 +1,199 @@

+"""
+models/base.py
+Shared utilities for all three CNN-LSTM variants:
+  - Data preparation (sequences, train/val/test split)
+  - Common Keras layers / callbacks
+  - Predict + evaluate helpers
+"""
+import numpy as np
+import pandas as pd
+from sklearn.preprocessing import RobustScaler
+import tensorflow as tf
+from tensorflow import keras
+# ── Reproducibility ───────────────────────────────────────────────────────────
+SEED = 42
+tf.random.set_seed(SEED)
+np.random.seed(SEED)
+# ── Sequence builder ──────────────────────────────────────────────────────────
+def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int):
+    """
+    Build supervised sequences for CNN-LSTM input.
+    Args:
+        features : 2-D array [n_days, n_features]
+        targets  : 2-D array [n_days, n_etfs]  (raw returns)
+        lookback : number of past days per sample
+    Returns:
+        X : [n_samples, lookback, n_features]
+        y : [n_samples, n_etfs]   (raw returns for the next day)
+    """
+    X, y = [], []
+    for i in range(lookback, len(features)):
+        X.append(features[i - lookback: i])
+        y.append(targets[i])
+    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)
+# ── Train / val / test split ──────────────────────────────────────────────────
+def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15):
+    """Split sequences into train / val / test preserving temporal order."""
+    n = len(X)
+    t1 = int(n * train_pct)
+    t2 = int(n * (train_pct + val_pct))
+    return (
+        X[:t1],  y[:t1],
+        X[t1:t2], y[t1:t2],
+        X[t2:],  y[t2:],
+    )
+# ── Feature scaling ───────────────────────────────────────────────────────────
+def scale_features(X_train, X_val, X_test):
+    """
+    Fit RobustScaler on training data only, apply to val and test.
+    Operates on the flattened feature dimension.
+    Returns scaled arrays with same shape as inputs.
+    """
+    n_train, lb, n_feat = X_train.shape
+    scaler = RobustScaler()
+    # Fit on train
+    scaler.fit(X_train.reshape(-1, n_feat))
+    def _transform(X):
+        shape = X.shape
+        return scaler.transform(X.reshape(-1, n_feat)).reshape(shape)
+    return _transform(X_train), _transform(X_val), _transform(X_test), scaler
+# ── Label builder (classification: argmax of returns) ────────────────────────
+def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0):
+    """
+    Convert raw return matrix to integer class labels.
+    If include_cash=True, adds a CASH class (index = n_etfs) when
+    the best ETF return is below cash_threshold.
+    Args:
+        y_raw           : [n_samples, n_etfs]
+        include_cash    : whether to allow CASH class
+        cash_threshold  : minimum ETF return to prefer over CASH
+    Returns:
+        labels : [n_samples] integer class indices
+    """
+    best = np.argmax(y_raw, axis=1)
+    if include_cash:
+        best_return = y_raw[np.arange(len(y_raw)), best]
+        cash_idx    = y_raw.shape[1]
+        labels      = np.where(best_return < cash_threshold, cash_idx, best)
+    else:
+        labels = best
+    return labels.astype(np.int32)
+# ── Common Keras callbacks ────────────────────────────────────────────────────
+def get_callbacks(patience_es=15, patience_lr=8, min_lr=1e-6):
+    """Standard early stopping + reduce-LR callbacks shared by all models."""
+    return [
+        keras.callbacks.EarlyStopping(
+            monitor="val_loss",
+            patience=patience_es,
+            restore_best_weights=True,
+            verbose=0,
+        ),
+        keras.callbacks.ReduceLROnPlateau(
+            monitor="val_loss",
+            factor=0.5,
+            patience=patience_lr,
+            min_lr=min_lr,
+            verbose=0,
+        ),
+    ]
+# ── Common output head ────────────────────────────────────────────────────────
+def classification_head(x, n_classes: int, dropout: float = 0.3):
+    """
+    Shared dense output head for all three CNN-LSTM variants.
+    Args:
+        x         : input tensor
+        n_classes : number of ETF classes (+ 1 for CASH if applicable)
+        dropout   : dropout rate
+    Returns:
+        output tensor with softmax activation
+    """
+    x = keras.layers.Dense(64, activation="relu")(x)
+    x = keras.layers.Dropout(dropout)(x)
+    x = keras.layers.Dense(n_classes, activation="softmax")(x)
+    return x
+# ── Prediction helper ─────────────────────────────────────────────────────────
+def predict_classes(model, X_test: np.ndarray) -> np.ndarray:
+    """Return integer class predictions from a Keras model."""
+    proba = model.predict(X_test, verbose=0)
+    return np.argmax(proba, axis=1), proba
+# ── Metrics helper ────────────────────────────────────────────────────────────
+def evaluate_returns(
+    preds: np.ndarray,
+    proba: np.ndarray,
+    y_raw_test: np.ndarray,
+    target_etfs: list,
+    tbill_rate: float,
+    fee_bps: int,
+    include_cash: bool = True,
+):
+    """
+    Given integer class predictions and raw return matrix,
+    compute strategy returns and summary metrics.
+    Returns:
+        strat_rets      : np.ndarray of daily net returns
+        ann_return      : annualised return (float)
+        cum_returns     : cumulative return series
+        last_proba      : probability vector for the last prediction
+        next_etf        : name of ETF predicted for next session
+    """
+    n_etfs     = len(target_etfs)
+    strat_rets = []
+    for i, cls in enumerate(preds):
+        if include_cash and cls == n_etfs:
+            # CASH: earn daily T-bill rate
+            daily_tbill = tbill_rate / 252
+            net = daily_tbill - (fee_bps / 10000)
+        else:
+            ret = y_raw_test[i][cls]
+            net = ret - (fee_bps / 10000)
+        strat_rets.append(net)
+    strat_rets  = np.array(strat_rets)
+    cum_returns = np.cumprod(1 + strat_rets)
+    ann_return  = (cum_returns[-1] ** (252 / len(strat_rets))) - 1
+    last_proba  = proba[-1]
+    next_cls    = int(np.argmax(last_proba))
+    next_etf    = "CASH" if (include_cash and next_cls == n_etfs) else target_etfs[next_cls].replace("_Ret", "")
+    return strat_rets, ann_return, cum_returns, last_proba, next_etf

	@@ -0,0 +1,167 @@

+"""
+models/approach1_wavelet.py
+Approach 1: Wavelet Decomposition CNN-LSTM
+Pipeline:
+  Raw macro signals
+  → DWT (db4, level=3) per signal → multi-band channel stack
+  → 1D CNN (64 filters, k=3) → MaxPool → (32 filters, k=3)
+  → LSTM (128 units)
+  → Dense 64 → Softmax (n_etfs + 1 CASH)
+"""
+import numpy as np
+import pywt
+import tensorflow as tf
+from tensorflow import keras
+from models.base import classification_head, get_callbacks
+WAVELET   = "db4"
+LEVEL     = 3
+# ── Wavelet feature engineering ───────────────────────────────────────────────
+def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> np.ndarray:
+    """
+    Decompose a 1-D signal into DWT subbands and return them stacked.
+    For a signal of length T:
+      coeffs = [cA_n, cD_n, cD_{n-1}, ..., cD_1]
+    We interpolate each subband back to length T so we can stack them.
+    Returns: array of shape [T, level+1]
+    """
+    T      = len(signal)
+    coeffs = pywt.wavedec(signal, wavelet, level=level)
+    bands  = []
+    for c in coeffs:
+        # Interpolate back to original length
+        band = np.interp(
+            np.linspace(0, len(c) - 1, T),
+            np.arange(len(c)),
+            c,
+        )
+        bands.append(band)
+    return np.stack(bands, axis=-1)   # [T, level+1]
+def apply_wavelet_transform(X: np.ndarray, wavelet: str = WAVELET, level: int = LEVEL) -> np.ndarray:
+    """
+    Apply DWT to every feature channel across all samples.
+    Args:
+        X : [n_samples, lookback, n_features]
+    Returns:
+        X_wt : [n_samples, lookback, n_features * (level+1)]
+    """
+    n_samples, lookback, n_features = X.shape
+    n_bands   = level + 1
+    X_wt      = np.zeros((n_samples, lookback, n_features * n_bands), dtype=np.float32)
+    for s in range(n_samples):
+        for f in range(n_features):
+            decomposed = _wavelet_decompose_signal(X[s, :, f], wavelet, level)   # [T, n_bands]
+            start = f * n_bands
+            X_wt[s, :, start: start + n_bands] = decomposed
+    return X_wt
+# ── Model builder ─────────────────────────────────────────────────────────────
+def build_wavelet_cnn_lstm(
+    input_shape: tuple,
+    n_classes: int,
+    dropout: float = 0.3,
+    lstm_units: int = 128,
+) -> keras.Model:
+    """
+    Build Wavelet CNN-LSTM model.
+    Args:
+        input_shape : (lookback, n_features * n_bands)  — post-DWT shape
+        n_classes   : number of output classes (ETFs + CASH)
+        dropout     : dropout rate
+        lstm_units  : LSTM hidden size
+    Returns:
+        Compiled Keras model
+    """
+    inputs = keras.Input(shape=input_shape, name="wavelet_input")
+    # CNN block 1
+    x = keras.layers.Conv1D(64, kernel_size=3, padding="causal", activation="relu")(inputs)
+    x = keras.layers.BatchNormalization()(x)
+    x = keras.layers.MaxPooling1D(pool_size=2)(x)
+    # CNN block 2
+    x = keras.layers.Conv1D(32, kernel_size=3, padding="causal", activation="relu")(x)
+    x = keras.layers.BatchNormalization()(x)
+    x = keras.layers.Dropout(dropout)(x)
+    # LSTM
+    x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(x)
+    # Output head
+    outputs = classification_head(x, n_classes, dropout)
+    model = keras.Model(inputs, outputs, name="Approach1_Wavelet_CNN_LSTM")
+    model.compile(
+        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
+        loss="sparse_categorical_crossentropy",
+        metrics=["accuracy"],
+    )
+    return model
+# ── Full train pipeline ───────────────────────────────────────────────────────
+def train_approach1(
+    X_train, y_train,
+    X_val,   y_val,
+    n_classes: int,
+    epochs: int = 100,
+    batch_size: int = 32,
+    dropout: float = 0.3,
+    lstm_units: int = 128,
+):
+    """
+    Apply wavelet transform then train the CNN-LSTM.
+    Args:
+        X_train/val : [n, lookback, n_features]  (scaled, pre-wavelet)
+        y_train/val : [n] integer class labels
+        n_classes   : total output classes
+    Returns:
+        model    : trained Keras model
+        history  : training history
+        wt_shape : post-DWT input shape (for inference)
+    """
+    # Apply DWT
+    X_train_wt = apply_wavelet_transform(X_train)
+    X_val_wt   = apply_wavelet_transform(X_val)
+    input_shape = X_train_wt.shape[1:]   # (lookback, n_features * n_bands)
+    model       = build_wavelet_cnn_lstm(input_shape, n_classes, dropout, lstm_units)
+    history = model.fit(
+        X_train_wt, y_train,
+        validation_data=(X_val_wt, y_val),
+        epochs=epochs,
+        batch_size=batch_size,
+        callbacks=get_callbacks(),
+        verbose=0,
+    )
+    return model, history, input_shape
+def predict_approach1(model, X_test: np.ndarray) -> tuple:
+    """Apply DWT to test set then predict. Returns (class_preds, proba)."""
+    X_test_wt = apply_wavelet_transform(X_test)
+    proba     = model.predict(X_test_wt, verbose=0)
+    preds     = np.argmax(proba, axis=1)
+    return preds, proba

	@@ -0,0 +1 @@


1	+

	@@ -0,0 +1,150 @@

+"""
+models/approach3_multiscale.py
+Approach 3: Multi-Scale Parallel CNN-LSTM
+Pipeline:
+  Raw macro signals
+  → 3 parallel CNN towers: kernel 3 (short), 7 (medium), 21 (long)
+  → Concatenate [96 features]
+  → LSTM (128 units)
+  → Dense 64 → Softmax (n_etfs + 1 CASH)
+"""
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+from models.base import classification_head, get_callbacks
+# Kernel sizes represent: momentum (3d), weekly cycle (7d), monthly trend (21d)
+KERNEL_SIZES  = [3, 7, 21]
+FILTERS_EACH  = 32   # 32 × 3 towers = 96 concatenated features
+# ── Model builder ─────────────────────────────────────────────────────────────
+def build_multiscale_cnn_lstm(
+    input_shape: tuple,
+    n_classes: int,
+    kernel_sizes: list = None,
+    filters: int = FILTERS_EACH,
+    dropout: float = 0.3,
+    lstm_units: int = 128,
+) -> keras.Model:
+    """
+    Multi-scale parallel CNN-LSTM.
+    Three CNN towers with different kernel sizes run in parallel on the
+    same input, capturing momentum, weekly cycle, and monthly trend
+    simultaneously. Their outputs are concatenated before the LSTM.
+    Args:
+        input_shape  : (lookback, n_features)
+        n_classes    : number of output classes (ETFs + CASH)
+        kernel_sizes : list of kernel sizes for each tower
+        filters      : number of Conv1D filters per tower
+        dropout      : dropout rate
+        lstm_units   : LSTM hidden size
+    Returns:
+        Compiled Keras model
+    """
+    if kernel_sizes is None:
+        kernel_sizes = KERNEL_SIZES
+    inputs = keras.Input(shape=input_shape, name="multiscale_input")
+    towers = []
+    for k in kernel_sizes:
+        # Each tower: Conv → BN → Conv → BN → GlobalAvgPool
+        t = keras.layers.Conv1D(
+            filters, kernel_size=k, padding="causal", activation="relu",
+            name=f"conv1_k{k}"
+        )(inputs)
+        t = keras.layers.BatchNormalization(name=f"bn1_k{k}")(t)
+        t = keras.layers.Conv1D(
+            filters, kernel_size=k, padding="causal", activation="relu",
+            name=f"conv2_k{k}"
+        )(t)
+        t = keras.layers.BatchNormalization(name=f"bn2_k{k}")(t)
+        t = keras.layers.Dropout(dropout, name=f"drop_k{k}")(t)
+        towers.append(t)
+    # Concatenate along the feature dimension — keeps temporal axis intact for LSTM
+    if len(towers) > 1:
+        merged = keras.layers.Concatenate(axis=-1, name="tower_concat")(towers)
+    else:
+        merged = towers[0]
+    # LSTM integrates multi-scale temporal features
+    x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1, name="lstm")(merged)
+    # Output head
+    outputs = classification_head(x, n_classes, dropout)
+    model = keras.Model(inputs, outputs, name="Approach3_MultiScale_CNN_LSTM")
+    model.compile(
+        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
+        loss="sparse_categorical_crossentropy",
+        metrics=["accuracy"],
+    )
+    return model
+# ── Full train pipeline ───────────────────────────────────────────────────────
+def train_approach3(
+    X_train, y_train,
+    X_val,   y_val,
+    n_classes: int,
+    epochs: int = 100,
+    batch_size: int = 32,
+    dropout: float = 0.3,
+    lstm_units: int = 128,
+    kernel_sizes: list = None,
+):
+    """
+    Build and train the multi-scale CNN-LSTM.
+    Args:
+        X_train/val : [n, lookback, n_features]
+        y_train/val : [n] integer class labels
+        n_classes   : total output classes
+    Returns:
+        model   : trained Keras model
+        history : training history
+    """
+    if kernel_sizes is None:
+        kernel_sizes = KERNEL_SIZES
+    # Guard: lookback must be >= largest kernel
+    lookback = X_train.shape[1]
+    valid_kernels = [k for k in kernel_sizes if k <= lookback]
+    if not valid_kernels:
+        valid_kernels = [min(3, lookback)]
+    model = build_multiscale_cnn_lstm(
+        input_shape=X_train.shape[1:],
+        n_classes=n_classes,
+        kernel_sizes=valid_kernels,
+        dropout=dropout,
+        lstm_units=lstm_units,
+    )
+    history = model.fit(
+        X_train, y_train,
+        validation_data=(X_val, y_val),
+        epochs=epochs,
+        batch_size=batch_size,
+        callbacks=get_callbacks(),
+        verbose=0,
+    )
+    return model, history
+def predict_approach3(model, X_test: np.ndarray) -> tuple:
+    """Predict on test set. Returns (class_preds, proba)."""
+    proba = model.predict(X_test, verbose=0)
+    preds = np.argmax(proba, axis=1)
+    return preds, proba

	@@ -0,0 +1,193 @@

+"""
+strategy/backtest.py
+Strategy execution, performance metrics, and benchmark calculations.
+Supports CASH as a class (earns T-bill rate when selected).
+"""
+import numpy as np
+import pandas as pd
+from datetime import datetime
+# ── Strategy execution ────────────────────────────────────────────────────────
+def execute_strategy(
+    preds: np.ndarray,
+    proba: np.ndarray,
+    y_raw_test: np.ndarray,
+    test_dates: pd.DatetimeIndex,
+    target_etfs: list,
+    fee_bps: int,
+    tbill_rate: float,
+    include_cash: bool = True,
+) -> dict:
+    """
+    Execute strategy from model predictions.
+    Args:
+        preds       : [n] integer class predictions
+        proba       : [n, n_classes] softmax probabilities
+        y_raw_test  : [n, n_etfs] actual next-day ETF returns
+        test_dates  : DatetimeIndex aligned with y_raw_test
+        target_etfs : list of ETF return column names e.g. ["TLT_Ret", ...]
+        fee_bps     : transaction fee in basis points
+        tbill_rate  : annualised 3m T-bill rate (e.g. 0.045)
+        include_cash: whether CASH is a valid class (index = n_etfs)
+    Returns:
+        dict with keys:
+            strat_rets, cum_returns, ann_return, sharpe,
+            hit_ratio, max_dd, max_daily_dd, cum_max,
+            audit_trail, next_signal, next_proba
+    """
+    n_etfs      = len(target_etfs)
+    daily_tbill = tbill_rate / 252
+    today       = datetime.now().date()
+    strat_rets  = []
+    audit_trail = []
+    for i, cls in enumerate(preds):
+        if include_cash and cls == n_etfs:
+            signal_etf   = "CASH"
+            realized_ret = daily_tbill
+        else:
+            cls          = min(cls, n_etfs - 1)
+            signal_etf   = target_etfs[cls].replace("_Ret", "")
+            realized_ret = float(y_raw_test[i][cls])
+        net_ret = realized_ret - (fee_bps / 10000)
+        strat_rets.append(net_ret)
+        trade_date = test_dates[i]
+        if trade_date.date() < today:
+            audit_trail.append({
+                "Date":       trade_date.strftime("%Y-%m-%d"),
+                "Signal":     signal_etf,
+                "Realized":   realized_ret,
+                "Net_Return": net_ret,
+            })
+    strat_rets = np.array(strat_rets, dtype=np.float64)
+    # Next signal (last prediction)
+    last_cls   = int(preds[-1])
+    next_proba = proba[-1]
+    if include_cash and last_cls == n_etfs:
+        next_signal = "CASH"
+    else:
+        last_cls    = min(last_cls, n_etfs - 1)
+        next_signal = target_etfs[last_cls].replace("_Ret", "")
+    metrics = _compute_metrics(strat_rets, tbill_rate)
+    return {
+        **metrics,
+        "strat_rets":  strat_rets,
+        "audit_trail": audit_trail,
+        "next_signal": next_signal,
+        "next_proba":  next_proba,
+    }
+# ── Performance metrics ───────────────────────────────────────────────────────
+def _compute_metrics(strat_rets: np.ndarray, tbill_rate: float) -> dict:
+    if len(strat_rets) == 0:
+        return {}
+    cum_returns = np.cumprod(1 + strat_rets)
+    n           = len(strat_rets)
+    ann_return  = float(cum_returns[-1] ** (252 / n) - 1)
+    excess      = strat_rets - tbill_rate / 252
+    sharpe      = float(np.mean(excess) / (np.std(strat_rets) + 1e-9) * np.sqrt(252))
+    recent      = strat_rets[-15:]
+    hit_ratio   = float(np.mean(recent > 0))
+    cum_max     = np.maximum.accumulate(cum_returns)
+    drawdown    = (cum_returns - cum_max) / cum_max
+    max_dd      = float(np.min(drawdown))
+    max_daily   = float(np.min(strat_rets))
+    return {
+        "cum_returns": cum_returns,
+        "ann_return":  ann_return,
+        "sharpe":      sharpe,
+        "hit_ratio":   hit_ratio,
+        "max_dd":      max_dd,
+        "max_daily_dd":max_daily,
+        "cum_max":     cum_max,
+    }
+def compute_benchmark_metrics(returns: np.ndarray, tbill_rate: float) -> dict:
+    """Compute metrics for a benchmark return series."""
+    return _compute_metrics(returns, tbill_rate)
+# ── Winner selection ──────────────────────────────────────────────────────────
+def select_winner(results: dict) -> str:
+    """
+    Given a dict of {approach_name: result_dict}, return the approach name
+    with the highest annualised return (raw, not risk-adjusted).
+    Args:
+        results : {"Approach 1": {...}, "Approach 2": {...}, "Approach 3": {...}}
+    Returns:
+        winner_name : str
+    """
+    best_name   = None
+    best_return = -np.inf
+    for name, res in results.items():
+        if res is None:
+            continue
+        ret = res.get("ann_return", -np.inf)
+        if ret > best_return:
+            best_return = ret
+            best_name   = name
+    return best_name
+# ── Comparison table ──────────────────────────────────────────────────────────
+def build_comparison_table(results: dict, winner_name: str) -> pd.DataFrame:
+    """
+    Build a summary DataFrame comparing all three approaches.
+    Args:
+        results     : {name: result_dict}
+        winner_name : name of the winner
+    Returns:
+        pd.DataFrame with one row per approach
+    """
+    rows = []
+    for name, res in results.items():
+        if res is None:
+            rows.append({
+                "Approach":       name,
+                "Ann. Return":    "N/A",
+                "Sharpe":         "N/A",
+                "Hit Ratio (15d)":"N/A",
+                "Max Drawdown":   "N/A",
+                "Winner":         "",
+            })
+            continue
+        rows.append({
+            "Approach":        name,
+            "Ann. Return":     f"{res['ann_return']*100:.2f}%",
+            "Sharpe":          f"{res['sharpe']:.2f}",
+            "Hit Ratio (15d)": f"{res['hit_ratio']*100:.0f}%",
+            "Max Drawdown":    f"{res['max_dd']*100:.2f}%",
+            "Winner":          "⭐ WINNER" if name == winner_name else "",
+        })
+    return pd.DataFrame(rows)

	@@ -0,0 +1,93 @@

+"""
+signals/conviction.py
+Signal conviction scoring via Z-score of model probabilities.
+"""
+import numpy as np
+CONVICTION_THRESHOLDS = {
+    "Very High": 2.0,
+    "High":      1.0,
+    "Moderate":  0.0,
+    # Below 0.0 → "Low"
+}
+def compute_conviction(proba: np.ndarray, target_etfs: list, include_cash: bool = True) -> dict:
+    """
+    Compute Z-score conviction for the selected signal.
+    Args:
+        proba       : 1-D softmax probability vector [n_classes]
+        target_etfs : list of ETF return column names (e.g. ["TLT_Ret", ...])
+        include_cash: whether CASH is the last class
+    Returns:
+        dict with keys:
+            best_idx        : int
+            best_name       : str  (ETF ticker or "CASH")
+            z_score         : float
+            label           : str  ("Very High" / "High" / "Moderate" / "Low")
+            scores          : np.ndarray (raw proba)
+            etf_names       : list of display names
+            sorted_pairs    : list of (name, score) sorted high→low
+    """
+    scores    = np.array(proba, dtype=float)
+    best_idx  = int(np.argmax(scores))
+    n_etfs    = len(target_etfs)
+    # Display names
+    etf_names = [e.replace("_Ret", "") for e in target_etfs]
+    if include_cash:
+        etf_names = etf_names + ["CASH"]
+    best_name = etf_names[best_idx] if best_idx < len(etf_names) else "CASH"
+    # Z-score
+    mean = np.mean(scores)
+    std  = np.std(scores)
+    z    = float((scores[best_idx] - mean) / std) if std > 1e-9 else 0.0
+    # Label
+    label = "Low"
+    for lbl, threshold in CONVICTION_THRESHOLDS.items():
+        if z >= threshold:
+            label = lbl
+            break
+    # Sorted pairs for UI bar chart
+    sorted_pairs = sorted(
+        zip(etf_names, scores),
+        key=lambda x: x[1],
+        reverse=True,
+    )
+    return {
+        "best_idx":     best_idx,
+        "best_name":    best_name,
+        "z_score":      z,
+        "label":        label,
+        "scores":       scores,
+        "etf_names":    etf_names,
+        "sorted_pairs": sorted_pairs,
+    }
+def conviction_color(label: str) -> str:
+    """Return hex accent colour for a conviction label."""
+    return {
+        "Very High": "#00b894",
+        "High":      "#00cec9",
+        "Moderate":  "#fdcb6e",
+        "Low":       "#d63031",
+    }.get(label, "#888888")
+def conviction_icon(label: str) -> str:
+    return {
+        "Very High": "🟢",
+        "High":      "🟢",
+        "Moderate":  "🟡",
+        "Low":       "🔴",
+    }.get(label, "⚪")

	@@ -0,0 +1,229 @@

+"""
+ui/components.py
+Reusable Streamlit UI blocks:
+  - Freshness warning banner
+  - Next trading day signal banner
+  - Signal conviction panel
+  - Metrics row
+  - Audit trail table
+  - Comparison summary table
+"""
+import streamlit as st
+import pandas as pd
+import numpy as np
+from signals.conviction import conviction_color, conviction_icon
+# ── Freshness warning ─────────────────────────────────────────────────────────
+def show_freshness_status(freshness: dict):
+    """Display data freshness status. Stops app if data is stale."""
+    if freshness.get("fresh"):
+        st.success(freshness["message"])
+    else:
+        st.warning(freshness["message"])
+# ── Next trading day banner ───────────────────────────────────────────────────
+def show_signal_banner(next_signal: str, next_date, approach_name: str):
+    """Large coloured banner showing the winning approach's next signal."""
+    is_cash = next_signal == "CASH"
+    bg      = "linear-gradient(135deg, #2d3436 0%, #1a1a2e 100%)" if is_cash else \
+              "linear-gradient(135deg, #00d1b2 0%, #00a896 100%)"
+    st.markdown(f"""
+    <div style="background:{bg}; padding:25px; border-radius:15px;
+                text-align:center; box-shadow:0 8px 16px rgba(0,0,0,0.3);
+                margin:16px 0;">
+      <div style="color:rgba(255,255,255,0.7); font-size:12px;
+                  letter-spacing:3px; margin-bottom:6px;">
+        {approach_name.upper()} · NEXT TRADING DAY SIGNAL
+      </div>
+      <h1 style="color:white; font-size:44px; margin:0 0 8px 0;
+                 font-weight:800; text-shadow:2px 2px 4px rgba(0,0,0,0.3);">
+        🎯 {next_date.strftime('%Y-%m-%d')} → {next_signal}
+      </h1>
+    </div>
+    """, unsafe_allow_html=True)
+# ── Signal conviction panel ───────────────────────────────────────────────────
+def show_conviction_panel(conviction: dict):
+    """
+    White-background conviction panel with Z-score gauge and per-ETF bars.
+    Uses separate st.markdown calls per ETF row to avoid Streamlit HTML escaping.
+    """
+    label      = conviction["label"]
+    z_score    = conviction["z_score"]
+    best_name  = conviction["best_name"]
+    sorted_pairs = conviction["sorted_pairs"]
+    color      = conviction_color(label)
+    icon       = conviction_icon(label)
+    z_clipped  = max(-3.0, min(3.0, z_score))
+    bar_pct    = int((z_clipped + 3) / 6 * 100)
+    max_score  = max(s for _, s in sorted_pairs) if sorted_pairs else 1.0
+    if max_score <= 0:
+        max_score = 1.0
+    # ── Header + gauge ────────────────────────────────────────────────────────
+    st.markdown(f"""
+    <div style="background:#ffffff; border:1px solid #ddd;
+                border-left:5px solid {color}; border-radius:12px 12px 0 0;
+                padding:18px 24px 12px 24px; margin:12px 0 0 0;
+                box-shadow:0 2px 8px rgba(0,0,0,0.07);">
+      <div style="display:flex; align-items:center; gap:12px;
+                  margin-bottom:14px; flex-wrap:wrap;">
+        <span style="font-size:20px;">{icon}</span>
+        <span style="font-size:18px; font-weight:700; color:#1a1a1a;">Signal Conviction</span>
+        <span style="background:#f0f0f0; border:1px solid {color};
+                     color:{color}; font-weight:700; font-size:14px;
+                     padding:3px 12px; border-radius:8px;">
+          Z = {z_score:.2f} &sigma;
+        </span>
+        <span style="margin-left:auto; background:{color}; color:#fff;
+                     font-weight:700; padding:4px 16px;
+                     border-radius:20px; font-size:13px;">
+          {label}
+        </span>
+      </div>
+      <div style="display:flex; justify-content:space-between;
+                  font-size:11px; color:#999; margin-bottom:4px;">
+        <span>Weak &minus;3&sigma;</span>
+        <span>Neutral 0&sigma;</span>
+        <span>Strong +3&sigma;</span>
+      </div>
+      <div style="background:#f0f0f0; border-radius:8px; height:14px;
+                  overflow:hidden; position:relative; border:1px solid #e0e0e0;
+                  margin-bottom:14px;">
+        <div style="position:absolute; left:50%; top:0; width:2px;
+                    height:100%; background:#ccc;"></div>
+        <div style="width:{bar_pct}%; height:100%;
+                    background:linear-gradient(90deg,#fab1a0,{color});
+                    border-radius:8px;"></div>
+      </div>
+      <div style="font-size:12px; color:#999; margin-bottom:2px;">
+        Model probability by ETF (ranked high &rarr; low):
+      </div>
+    </div>
+    """, unsafe_allow_html=True)
+    # ── Per-ETF rows ──────────────────────────────────────────────────────────
+    for i, (name, score) in enumerate(sorted_pairs):
+        is_winner  = (name == best_name)
+        is_last    = (i == len(sorted_pairs) - 1)
+        bar_w      = int(score / max_score * 100)
+        name_style = "font-weight:700; color:#00897b;" if is_winner else "color:#444;"
+        bar_color  = color if is_winner else "#b2dfdb" if score > max_score * 0.5 else "#e0e0e0"
+        star       = " ★" if is_winner else ""
+        bottom_r   = "0 0 12px 12px" if is_last else "0"
+        border_bot = "border-bottom:1px solid #f0f0f0;" if not is_last else ""
+        st.markdown(f"""
+        <div style="background:#ffffff; border:1px solid #ddd; border-top:none;
+                    border-radius:{bottom_r}; padding:7px 24px; {border_bot}
+                    box-shadow:0 2px 8px rgba(0,0,0,0.07);">
+          <div style="display:flex; align-items:center; gap:12px;">
+            <span style="width:44px; text-align:right; font-size:13px; {name_style}">{name}{star}</span>
+            <div style="flex:1; background:#f5f5f5; border-radius:4px;
+                        height:14px; overflow:hidden; border:1px solid #e8e8e8;">
+              <div style="width:{bar_w}%; height:100%;
+                          background:{bar_color}; border-radius:4px;"></div>
+            </div>
+            <span style="width:56px; font-size:12px; color:#888; text-align:right;">{score:.4f}</span>
+          </div>
+        </div>
+        """, unsafe_allow_html=True)
+    st.caption(
+        "Z-score = std deviations the top ETF's probability sits above the mean of all ETF probabilities. "
+        "Higher → model is more decisive."
+    )
+# ── Metrics row ───────────────────────────────────────────────────────────────
+def show_metrics_row(result: dict, tbill_rate: float):
+    """Five-column metric display."""
+    col1, col2, col3, col4, col5 = st.columns(5)
+    col1.metric(
+        "📈 Annualised Return",
+        f"{result['ann_return']*100:.2f}%",
+        delta=f"vs T-bill: {(result['ann_return'] - tbill_rate)*100:.2f}%",
+    )
+    col2.metric(
+        "📊 Sharpe Ratio",
+        f"{result['sharpe']:.2f}",
+        delta="Risk-Adjusted" if result['sharpe'] > 1 else "Below Threshold",
+    )
+    col3.metric(
+        "🎯 Hit Ratio (15d)",
+        f"{result['hit_ratio']*100:.0f}%",
+        delta="Strong" if result['hit_ratio'] > 0.6 else "Weak",
+    )
+    col4.metric(
+        "📉 Max Drawdown",
+        f"{result['max_dd']*100:.2f}%",
+        delta="Peak to Trough",
+    )
+    col5.metric(
+        "⚠️ Max Daily DD",
+        f"{result['max_daily_dd']*100:.2f}%",
+        delta="Worst Day",
+    )
+# ── Comparison table ──────────────────────────────────────────────────────────
+def show_comparison_table(comparison_df: pd.DataFrame):
+    """Styled comparison table for all three approaches."""
+    def highlight_winner(row):
+        if "WINNER" in str(row.get("Winner", "")):
+            return ["background-color: rgba(0,200,150,0.15); font-weight:bold"] * len(row)
+        return [""] * len(row)
+    styled = comparison_df.style.apply(highlight_winner, axis=1).set_properties(**{
+        "text-align": "center",
+        "font-size": "14px",
+    }).set_table_styles([
+        {"selector": "th", "props": [("font-size", "14px"), ("font-weight", "bold"),
+                                      ("text-align", "center")]},
+        {"selector": "td", "props": [("padding", "10px")]},
+    ])
+    st.dataframe(styled, use_container_width=True)
+# ── Audit trail ───────────────────────────────────────────────────────────────
+def show_audit_trail(audit_trail: list):
+    """Last 20 days styled audit trail."""
+    if not audit_trail:
+        st.info("No audit trail data available.")
+        return
+    df = pd.DataFrame(audit_trail).tail(20)[["Date", "Signal", "Net_Return"]]
+    def color_return(val):
+        return "color: #00c896; font-weight:bold" if val > 0 else "color: #ff4b4b; font-weight:bold"
+    styled = df.style.applymap(color_return, subset=["Net_Return"]).format(
+        {"Net_Return": "{:.2%}"}
+    ).set_properties(**{
+        "font-size": "16px",
+        "text-align": "center",
+    }).set_table_styles([
+        {"selector": "th", "props": [("font-size", "16px"), ("font-weight", "bold"),
+                                      ("text-align", "center")]},
+        {"selector": "td", "props": [("padding", "10px")]},
+    ])
+    st.dataframe(styled, use_container_width=True, height=500)

	@@ -0,0 +1,144 @@

+"""
+ui/charts.py
+All Plotly chart builders for the Streamlit UI.
+"""
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+APPROACH_COLOURS = {
+    "Approach 1": "#00ffc8",
+    "Approach 2": "#7c6aff",
+    "Approach 3": "#ff6b6b",
+}
+BENCHMARK_COLOURS = {
+    "SPY": "#ff4b4b",
+    "AGG": "#ffa500",
+}
+def equity_curve_chart(
+    results: dict,
+    winner_name: str,
+    plot_dates: pd.DatetimeIndex,
+    df: pd.DataFrame,
+    test_slice: slice,
+    tbill_rate: float,
+) -> go.Figure:
+    """
+    Equity curve chart showing all three approaches + SPY + AGG benchmarks.
+    Args:
+        results     : {approach_name: result_dict}
+        winner_name : highlighted approach
+        plot_dates  : DatetimeIndex for x-axis
+        df          : full DataFrame (for benchmark columns)
+        test_slice  : slice object to extract test-period benchmark returns
+        tbill_rate  : for benchmark metric calculation
+    """
+    from strategy.backtest import compute_benchmark_metrics
+    fig = go.Figure()
+    # ── Strategy lines ────────────────────────────────────────────────────────
+    for name, res in results.items():
+        if res is None:
+            continue
+        colour = APPROACH_COLOURS.get(name, "#aaaaaa")
+        width  = 3 if name == winner_name else 1.5
+        dash   = "solid" if name == winner_name else "dot"
+        n = min(len(res["cum_returns"]), len(plot_dates))
+        fig.add_trace(go.Scatter(
+            x=plot_dates[:n],
+            y=res["cum_returns"][:n],
+            mode="lines",
+            name=f"{name} {'★' if name == winner_name else ''}",
+            line=dict(color=colour, width=width, dash=dash),
+            fill="tozeroy" if name == winner_name else None,
+            fillcolor=f"rgba({_hex_to_rgb(colour)},0.07)" if name == winner_name else None,
+        ))
+    # ── Benchmark: SPY ────────────────────────────────────────────────────────
+    if "SPY_Ret" in df.columns:
+        spy_rets = df["SPY_Ret"].iloc[test_slice].values
+        n        = min(len(spy_rets), len(plot_dates))
+        spy_m    = compute_benchmark_metrics(spy_rets[:n], tbill_rate)
+        fig.add_trace(go.Scatter(
+            x=plot_dates[:n],
+            y=spy_m["cum_returns"],
+            mode="lines",
+            name="SPY (Equity BM)",
+            line=dict(color=BENCHMARK_COLOURS["SPY"], width=1.5, dash="dot"),
+        ))
+    # ── Benchmark: AGG ────────────────────────────────────────────────────────
+    if "AGG_Ret" in df.columns:
+        agg_rets = df["AGG_Ret"].iloc[test_slice].values
+        n        = min(len(agg_rets), len(plot_dates))
+        agg_m    = compute_benchmark_metrics(agg_rets[:n], tbill_rate)
+        fig.add_trace(go.Scatter(
+            x=plot_dates[:n],
+            y=agg_m["cum_returns"],
+            mode="lines",
+            name="AGG (Bond BM)",
+            line=dict(color=BENCHMARK_COLOURS["AGG"], width=1.5, dash="dot"),
+        ))
+    fig.update_layout(
+        template="plotly_dark",
+        height=460,
+        hovermode="x unified",
+        showlegend=True,
+        legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01, font=dict(size=11)),
+        xaxis_title="Date",
+        yaxis_title="Cumulative Return (×)",
+        margin=dict(l=50, r=30, t=20, b=50),
+    )
+    return fig
+def comparison_bar_chart(results: dict, winner_name: str) -> go.Figure:
+    """
+    Horizontal bar chart comparing annualised returns across all three approaches.
+    """
+    names   = []
+    returns = []
+    colours = []
+    for name, res in results.items():
+        if res is None:
+            continue
+        names.append(name)
+        returns.append(res["ann_return"] * 100)
+        colours.append(APPROACH_COLOURS.get(name, "#aaaaaa"))
+    fig = go.Figure(go.Bar(
+        x=returns,
+        y=names,
+        orientation="h",
+        marker_color=colours,
+        text=[f"{r:.1f}%" for r in returns],
+        textposition="auto",
+    ))
+    fig.update_layout(
+        template="plotly_dark",
+        height=200,
+        xaxis_title="Annualised Return (%)",
+        margin=dict(l=100, r=30, t=10, b=40),
+        showlegend=False,
+    )
+    return fig
+# ── Helper ────────────────────────────────────────────────────────────────────
+def _hex_to_rgb(hex_color: str) -> str:
+    """Convert #rrggbb to 'r,g,b' string for rgba()."""
+    h = hex_color.lstrip("#")
+    r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
+    return f"{r},{g},{b}"

	@@ -0,0 +1,91 @@

+"""
+utils/calendar.py
+NYSE calendar utilities:
+  - Next trading day for signal display
+  - Market open check
+  - EST time helper
+"""
+from datetime import datetime, timedelta
+import pytz
+try:
+    import pandas_market_calendars as mcal
+    NYSE_CAL_AVAILABLE = True
+except ImportError:
+    NYSE_CAL_AVAILABLE = False
+def get_est_time() -> datetime:
+    """Return current datetime in US/Eastern timezone."""
+    return datetime.now(pytz.timezone("US/Eastern"))
+def is_market_open_today() -> bool:
+    """Return True if today is a NYSE trading day."""
+    today = get_est_time().date()
+    if NYSE_CAL_AVAILABLE:
+        try:
+            nyse     = mcal.get_calendar("NYSE")
+            schedule = nyse.schedule(start_date=today, end_date=today)
+            return len(schedule) > 0
+        except Exception:
+            pass
+    return today.weekday() < 5
+def get_next_signal_date() -> datetime.date:
+    """
+    Determine the date for which the model's signal applies.
+    Rules:
+      - If today is a NYSE trading day AND it is before 09:30 EST
+        → signal applies to TODAY (market hasn't opened yet)
+      - Otherwise
+        → signal applies to the NEXT NYSE trading day
+    """
+    now_est = get_est_time()
+    today   = now_est.date()
+    market_not_open_yet = (
+        now_est.hour < 9 or
+        (now_est.hour == 9 and now_est.minute < 30)
+    )
+    if NYSE_CAL_AVAILABLE:
+        try:
+            nyse     = mcal.get_calendar("NYSE")
+            schedule = nyse.schedule(
+                start_date=today,
+                end_date=today + timedelta(days=10),
+            )
+            if len(schedule) == 0:
+                return today   # fallback
+            first_day = schedule.index[0].date()
+            # Today is a trading day and market hasn't opened → today
+            if first_day == today and market_not_open_yet:
+                return today
+            # Otherwise find first trading day strictly after today
+            for ts in schedule.index:
+                d = ts.date()
+                if d > today:
+                    return d
+            return schedule.index[-1].date()
+        except Exception:
+            pass
+    # Fallback: simple weekend skip
+    candidate = today if market_not_open_yet else today + timedelta(days=1)
+    while candidate.weekday() >= 5:
+        candidate += timedelta(days=1)
+    return candidate
+def is_sync_window() -> bool:
+    """True if current EST time is in the 07:00-08:00 or 19:00-20:00 window."""
+    now = get_est_time()
+    return (7 <= now.hour < 8) or (19 <= now.hour < 20)

	@@ -0,0 +1 @@


1	+

	@@ -0,0 +1 @@


1	+

	@@ -0,0 +1 @@


1	+

	@@ -0,0 +1 @@


1	+

	@@ -0,0 +1 @@


1	+

	@@ -0,0 +1 @@


1	+ # models package

	@@ -1 +1 @@
1	-


1	+ # strategy package

	@@ -1 +1 @@
1	-


1	+ # strategy package

	@@ -1 +1 @@
1	- # ~~strategy~~ package


1	+ # signals package

	@@ -1 +1 @@
1	-


1	+ # ui package

	@@ -1 +1 @@
1	-


1	+ # utils package

	@@ -1 +1,217 @@



1

+"""
+models/approach2_regime.py
+Approach 2: Regime-Conditioned CNN-LSTM
+Pipeline:
+  Raw macro signals
+  -> CNN Tower (64 filters, k=3) -> feature vector
+  -> Regime Classifier (HMM on VIX + HY spread + T10Y2Y) -> one-hot [4]
+  -> Concatenate CNN features + regime embedding
+  -> LSTM (128 units)
+  -> Dense 64 -> Softmax (n_etfs + 1 CASH)
+NOTE: tensorflow and hmmlearn are imported lazily inside functions
+to prevent module-level import failures from making this module
+appear broken to Python's import system.
+"""
+import numpy as np
+N_REGIMES    = 4
+REGIME_HINTS = ["VIX", "HY", "Spread", "T10Y2Y", "T10Y3M", "Credit"]
+# ---------------------------------------------------------------------------
+# Regime detection helpers
+# ---------------------------------------------------------------------------
+def _get_regime_cols(feature_names: list) -> list:
+    return [
+        f for f in feature_names
+        if any(hint.lower() in f.lower() for hint in REGIME_HINTS)
+    ]
+def fit_regime_model(X_flat: np.ndarray, feature_names: list,
+                     n_regimes: int = N_REGIMES):
+    """
+    Fit a Gaussian HMM on regime-relevant macro features.
+    Returns (hmm_model, regime_cols_idx).
+    hmm_model is None if hmmlearn is unavailable or fitting fails.
+    """
+    regime_col_names = _get_regime_cols(feature_names)
+    if not regime_col_names:
+        regime_col_names = feature_names[:min(3, len(feature_names))]
+    regime_cols_idx = [
+        feature_names.index(c) for c in regime_col_names
+        if c in feature_names
+    ]
+    X_regime = X_flat[:, regime_cols_idx]
+    try:
+        from hmmlearn.hmm import GaussianHMM
+        hmm = GaussianHMM(
+            n_components=n_regimes,
+            covariance_type="diag",
+            n_iter=100,
+            random_state=42,
+        )
+        hmm.fit(X_regime)
+        return hmm, regime_cols_idx
+    except Exception as e:
+        print(f"[Approach 2] HMM fitting failed: {e}. Using fallback.")
+        return None, regime_cols_idx
+def predict_regimes(hmm_model, X_flat: np.ndarray,
+                    regime_cols_idx: list,
+                    n_regimes: int = N_REGIMES) -> np.ndarray:
+    """Predict integer regime label for each day."""
+    X_regime = X_flat[:, regime_cols_idx]
+    if hmm_model is not None:
+        try:
+            return hmm_model.predict(X_regime)
+        except Exception:
+            pass
+    # Fallback: quantile binning on first regime feature
+    feat      = X_regime[:, 0]
+    quantiles = np.percentile(feat, np.linspace(0, 100, n_regimes + 1))
+    return np.digitize(feat, quantiles[1:-1]).astype(int)
+def regimes_to_onehot(regimes: np.ndarray,
+                      n_regimes: int = N_REGIMES) -> np.ndarray:
+    one_hot = np.zeros((len(regimes), n_regimes), dtype=np.float32)
+    for i, r in enumerate(regimes):
+        one_hot[i, min(int(r), n_regimes - 1)] = 1.0
+    return one_hot
+def build_regime_sequences(X_seq: np.ndarray,
+                            regimes_flat: np.ndarray,
+                            lookback: int) -> np.ndarray:
+    n_samples = X_seq.shape[0]
+    aligned   = regimes_flat[lookback: lookback + n_samples]
+    return regimes_to_onehot(aligned)
+# ---------------------------------------------------------------------------
+# Model builder
+# ---------------------------------------------------------------------------
+def build_regime_cnn_lstm(seq_input_shape: tuple,
+                           n_classes: int,
+                           n_regimes: int = N_REGIMES,
+                           dropout: float = 0.3,
+                           lstm_units: int = 128):
+    """Build and compile the regime-conditioned CNN-LSTM model."""
+    from tensorflow import keras
+    from models.base import classification_head
+    seq_input = keras.Input(shape=seq_input_shape, name="seq_input")
+    x = keras.layers.Conv1D(64, kernel_size=3, padding="causal",
+                            activation="relu")(seq_input)
+    x = keras.layers.BatchNormalization()(x)
+    x = keras.layers.MaxPooling1D(pool_size=2)(x)
+    x = keras.layers.Conv1D(32, kernel_size=3, padding="causal",
+                            activation="relu")(x)
+    x = keras.layers.BatchNormalization()(x)
+    x = keras.layers.Dropout(dropout)(x)
+    cnn_out = keras.layers.GlobalAveragePooling1D()(x)
+    regime_input = keras.Input(shape=(n_regimes,), name="regime_input")
+    regime_emb   = keras.layers.Dense(8, activation="relu")(regime_input)
+    merged = keras.layers.Concatenate()([cnn_out, regime_emb])
+    x      = keras.layers.Reshape((1, merged.shape[-1]))(merged)
+    x      = keras.layers.LSTM(lstm_units, dropout=dropout)(x)
+    outputs = classification_head(x, n_classes, dropout)
+    model = keras.Model(
+        inputs=[seq_input, regime_input],
+        outputs=outputs,
+        name="Approach2_Regime_CNN_LSTM",
+    )
+    model.compile(
+        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
+        loss="sparse_categorical_crossentropy",
+        metrics=["accuracy"],
+    )
+    return model
+# ---------------------------------------------------------------------------
+# Training pipeline
+# ---------------------------------------------------------------------------
+def train_approach2(
+    X_train, y_train,
+    X_val,   y_val,
+    X_flat_all: np.ndarray,
+    feature_names: list,
+    lookback: int,
+    train_size: int,
+    val_size: int,
+    n_classes: int,
+    epochs: int = 100,
+    batch_size: int = 32,
+    dropout: float = 0.3,
+    lstm_units: int = 128,
+):
+    """
+    Fit HMM regime model then train the regime-conditioned CNN-LSTM.
+    Returns: model, history, hmm_model, regime_cols_idx
+    """
+    from models.base import get_callbacks
+    X_flat_train = X_flat_all[:train_size + lookback]
+    hmm_model, regime_cols_idx = fit_regime_model(X_flat_train, feature_names)
+    regimes_all = predict_regimes(hmm_model, X_flat_all, regime_cols_idx)
+    R_train = build_regime_sequences(X_train, regimes_all, lookback)
+    R_val   = build_regime_sequences(X_val,   regimes_all, lookback + train_size)
+    model = build_regime_cnn_lstm(
+        X_train.shape[1:], n_classes,
+        dropout=dropout, lstm_units=lstm_units,
+    )
+    history = model.fit(
+        [X_train, R_train], y_train,
+        validation_data=([X_val, R_val], y_val),
+        epochs=epochs,
+        batch_size=batch_size,
+        callbacks=get_callbacks(),
+        verbose=0,
+    )
+    return model, history, hmm_model, regime_cols_idx
+# ---------------------------------------------------------------------------
+# Inference
+# ---------------------------------------------------------------------------
+def predict_approach2(
+    model,
+    X_test: np.ndarray,
+    X_flat_all: np.ndarray,
+    regime_cols_idx: list,
+    hmm_model,
+    lookback: int,
+    train_size: int,
+    val_size: int,
+) -> tuple:
+    """Predict on test set with regime conditioning. Returns (preds, proba)."""
+    regimes_all = predict_regimes(hmm_model, X_flat_all, regime_cols_idx)
+    offset      = lookback + train_size + val_size
+    R_test      = build_regime_sequences(X_test, regimes_all, offset)
+    proba = model.predict([X_test, R_test], verbose=0)
+    preds = np.argmax(proba, axis=1)
+    return preds, proba

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py CHANGED Viewed

@@ -1,18 +1,16 @@
 """
 models/base.py
-Shared utilities for all three CNN-LSTM variants:
-  - Data preparation (sequences, train/val/test split)
-  - Common Keras layers / callbacks
-  - Predict + evaluate helpers
 """
 import numpy as np
 import pandas as pd
 from sklearn.preprocessing import RobustScaler
 import tensorflow as tf
 from tensorflow import keras
-# ── Reproducibility ───────────────────────────────────────────────────────────
 SEED = 42
 tf.random.set_seed(SEED)
 np.random.seed(SEED)
@@ -23,15 +21,7 @@ np.random.seed(SEED)
 def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int):
     """
     Build supervised sequences for CNN-LSTM input.
-    Args:
-        features : 2-D array [n_days, n_features]
-        targets  : 2-D array [n_days, n_etfs]  (raw returns)
-        lookback : number of past days per sample
-    Returns:
-        X : [n_samples, lookback, n_features]
-        y : [n_samples, n_etfs]   (raw returns for the next day)
     """
     X, y = [], []
     for i in range(lookback, len(features)):
@@ -43,11 +33,9 @@ def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int):
 # ── Train / val / test split ──────────────────────────────────────────────────
 def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15):
-    """Split sequences into train / val / test preserving temporal order."""
-    n = len(X)
     t1 = int(n * train_pct)
     t2 = int(n * (train_pct + val_pct))
     return (
         X[:t1],  y[:t1],
         X[t1:t2], y[t1:t2],
@@ -58,56 +46,66 @@ def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15):
 # ── Feature scaling ───────────────────────────────────────────────────────────
 def scale_features(X_train, X_val, X_test):
-    """
-    Fit RobustScaler on training data only, apply to val and test.
-    Operates on the flattened feature dimension.
-    Returns scaled arrays with same shape as inputs.
-    """
-    n_train, lb, n_feat = X_train.shape
-    scaler = RobustScaler()
-    # Fit on train
     scaler.fit(X_train.reshape(-1, n_feat))
-    def _transform(X):
-        shape = X.shape
-        return scaler.transform(X.reshape(-1, n_feat)).reshape(shape)
-    return _transform(X_train), _transform(X_val), _transform(X_test), scaler
-# ── Label builder (classification: argmax of returns) ────────────────────────
 def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0):
     """
-    Convert raw return matrix to integer class labels.
-    If include_cash=True, adds a CASH class (index = n_etfs) when
-    the best ETF return is below cash_threshold.
-    Args:
-        y_raw           : [n_samples, n_etfs]
-        include_cash    : whether to allow CASH class
-        cash_threshold  : minimum ETF return to prefer over CASH
-    Returns:
-        labels : [n_samples] integer class indices
     """
-    best = np.argmax(y_raw, axis=1)
     if include_cash:
-        best_return = y_raw[np.arange(len(y_raw)), best]
-        cash_idx    = y_raw.shape[1]
-        labels      = np.where(best_return < cash_threshold, cash_idx, best)
     else:
         labels = best
     return labels.astype(np.int32)
-# ── Common Keras callbacks ────────────────────────────────────────────────────
-def get_callbacks(patience_es=15, patience_lr=8, min_lr=1e-6):
-    """Standard early stopping + reduce-LR callbacks shared by all models."""
     return [
         keras.callbacks.EarlyStopping(
             monitor="val_loss",
@@ -125,75 +123,51 @@ def get_callbacks(patience_es=15, patience_lr=8, min_lr=1e-6):
     ]
-# ── Common output head ────────────────────────────────────────────────────────
 def classification_head(x, n_classes: int, dropout: float = 0.3):
-    """
-    Shared dense output head for all three CNN-LSTM variants.
-    Args:
-        x         : input tensor
-        n_classes : number of ETF classes (+ 1 for CASH if applicable)
-        dropout   : dropout rate
-    Returns:
-        output tensor with softmax activation
-    """
     x = keras.layers.Dense(64, activation="relu")(x)
     x = keras.layers.Dropout(dropout)(x)
     x = keras.layers.Dense(n_classes, activation="softmax")(x)
     return x
-# ── Prediction helper ─────────────────────────────────────────────────────────
-def predict_classes(model, X_test: np.ndarray) -> np.ndarray:
-    """Return integer class predictions from a Keras model."""
     proba = model.predict(X_test, verbose=0)
     return np.argmax(proba, axis=1), proba
-# ── Metrics helper ────────────────────────────────────────────────────────────
 def evaluate_returns(
-    preds: np.ndarray,
-    proba: np.ndarray,
-    y_raw_test: np.ndarray,
-    target_etfs: list,
-    tbill_rate: float,
-    fee_bps: int,
-    include_cash: bool = True,
 ):
-    """
-    Given integer class predictions and raw return matrix,
-    compute strategy returns and summary metrics.
-    Returns:
-        strat_rets      : np.ndarray of daily net returns
-        ann_return      : annualised return (float)
-        cum_returns     : cumulative return series
-        last_proba      : probability vector for the last prediction
-        next_etf        : name of ETF predicted for next session
-    """
     n_etfs     = len(target_etfs)
-    strat_rets = []
     for i, cls in enumerate(preds):
         if include_cash and cls == n_etfs:
-            # CASH: earn daily T-bill rate
-            daily_tbill = tbill_rate / 252
-            net = daily_tbill - (fee_bps / 10000)
         else:
-            ret = y_raw_test[i][cls]
-            net = ret - (fee_bps / 10000)
         strat_rets.append(net)
     strat_rets  = np.array(strat_rets)
     cum_returns = np.cumprod(1 + strat_rets)
-    ann_return  = (cum_returns[-1] ** (252 / len(strat_rets))) - 1
     last_proba  = proba[-1]
     next_cls    = int(np.argmax(last_proba))
-    next_etf    = "CASH" if (include_cash and next_cls == n_etfs) else target_etfs[next_cls].replace("_Ret", "")
     return strat_rets, ann_return, cum_returns, last_proba, next_etf

 """
 models/base.py
+Shared utilities for all three CNN-LSTM variants.
+Key fix: class_weight support to prevent majority-class collapse.
 """
 import numpy as np
 import pandas as pd
 from sklearn.preprocessing import RobustScaler
+from sklearn.utils.class_weight import compute_class_weight
 import tensorflow as tf
 from tensorflow import keras
 SEED = 42
 tf.random.set_seed(SEED)
 np.random.seed(SEED)
 def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int):
     """
     Build supervised sequences for CNN-LSTM input.
+    X[i] = features[i : i+lookback]  →  predicts  y[i+lookback]
     """
     X, y = [], []
     for i in range(lookback, len(features)):
 # ── Train / val / test split ──────────────────────────────────────────────────
 def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15):
+    n  = len(X)
     t1 = int(n * train_pct)
     t2 = int(n * (train_pct + val_pct))
     return (
         X[:t1],  y[:t1],
         X[t1:t2], y[t1:t2],
 # ── Feature scaling ───────────────────────────────────────────────────────────
 def scale_features(X_train, X_val, X_test):
+    n_feat  = X_train.shape[2]
+    scaler  = RobustScaler()
     scaler.fit(X_train.reshape(-1, n_feat))
+    def _t(X):
+        s = X.shape
+        return scaler.transform(X.reshape(-1, n_feat)).reshape(s)
+    return _t(X_train), _t(X_val), _t(X_test), scaler
+# ── Label builder ─────────────────────────────────────────────────────────────
 def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0):
     """
+    Assign label = argmax(returns).
+    If include_cash and best return < cash_threshold → label = n_etfs (CASH).
     """
+    best        = np.argmax(y_raw, axis=1)
     if include_cash:
+        best_ret = y_raw[np.arange(len(y_raw)), best]
+        cash_idx = y_raw.shape[1]
+        labels   = np.where(best_ret < cash_threshold, cash_idx, best)
     else:
         labels = best
     return labels.astype(np.int32)
+# ── Class weights ─────────────────────────────────────────────────────────────
+def compute_class_weights(y_labels: np.ndarray, n_classes: int) -> dict:
+    """
+    Compute balanced class weights to counteract majority-class collapse.
+    Returns dict {class_index: weight} for use in model.fit().
+    """
+    classes = np.arange(n_classes)
+    present = np.unique(y_labels)
+    try:
+        weights = compute_class_weight(
+            class_weight="balanced",
+            classes=present,
+            y=y_labels,
+        )
+        weight_dict = {int(c): float(w) for c, w in zip(present, weights)}
+    except Exception:
+        weight_dict = {}
+    # Fill any missing classes with weight 1.0
+    for c in classes:
+        if c not in weight_dict:
+            weight_dict[c] = 1.0
+    return weight_dict
+# ── Callbacks ─────────────────────────────────────────────────────────────────
+def get_callbacks(patience_es=20, patience_lr=10, min_lr=1e-6):
+    """Longer patience to allow models time to learn past majority class."""
     return [
         keras.callbacks.EarlyStopping(
             monitor="val_loss",
     ]
+# ── Output head ───────────────────────────────────────────────────────────────
 def classification_head(x, n_classes: int, dropout: float = 0.3):
     x = keras.layers.Dense(64, activation="relu")(x)
+    x = keras.layers.BatchNormalization()(x)
     x = keras.layers.Dropout(dropout)(x)
+    x = keras.layers.Dense(32, activation="relu")(x)
+    x = keras.layers.Dropout(dropout / 2)(x)
     x = keras.layers.Dense(n_classes, activation="softmax")(x)
     return x
+# ── Prediction ────────────────────────────────────────────────────────────────
+def predict_classes(model, X_test: np.ndarray) -> tuple:
     proba = model.predict(X_test, verbose=0)
     return np.argmax(proba, axis=1), proba
+# ── Metrics ───────────────────────────────────────────────────────────────────
 def evaluate_returns(
+    preds, proba, y_raw_test, target_etfs, tbill_rate, fee_bps, include_cash=True,
 ):
     n_etfs     = len(target_etfs)
+    daily_tbill = tbill_rate / 252
+    strat_rets  = []
     for i, cls in enumerate(preds):
         if include_cash and cls == n_etfs:
+            net = daily_tbill - fee_bps / 10000
         else:
+            cls = min(int(cls), n_etfs - 1)
+            net = float(y_raw_test[i][cls]) - fee_bps / 10000
         strat_rets.append(net)
     strat_rets  = np.array(strat_rets)
     cum_returns = np.cumprod(1 + strat_rets)
+    ann_return  = cum_returns[-1] ** (252 / len(strat_rets)) - 1
     last_proba  = proba[-1]
     next_cls    = int(np.argmax(last_proba))
+    next_etf    = (
+        "CASH" if (include_cash and next_cls == n_etfs)
+        else target_etfs[min(next_cls, n_etfs - 1)].replace("_Ret", "")
+    )
     return strat_rets, ann_return, cum_returns, last_proba, next_etf

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py CHANGED Viewed

@@ -1,167 +1,86 @@
 """
 models/approach1_wavelet.py
 Approach 1: Wavelet Decomposition CNN-LSTM
-Pipeline:
-  Raw macro signals
-  → DWT (db4, level=3) per signal → multi-band channel stack
-  → 1D CNN (64 filters, k=3) → MaxPool → (32 filters, k=3)
-  → LSTM (128 units)
-  → Dense 64 → Softmax (n_etfs + 1 CASH)
 """
 import numpy as np
 import pywt
-import tensorflow as tf
-from tensorflow import keras
-from models.base import classification_head, get_callbacks
-WAVELET   = "db4"
-LEVEL     = 3
-# ── Wavelet feature engineering ───────────────────────────────────────────────
 def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> np.ndarray:
-    """
-    Decompose a 1-D signal into DWT subbands and return them stacked.
-    For a signal of length T:
-      coeffs = [cA_n, cD_n, cD_{n-1}, ..., cD_1]
-    We interpolate each subband back to length T so we can stack them.
-    Returns: array of shape [T, level+1]
-    """
     T      = len(signal)
     coeffs = pywt.wavedec(signal, wavelet, level=level)
     bands  = []
     for c in coeffs:
-        # Interpolate back to original length
-        band = np.interp(
-            np.linspace(0, len(c) - 1, T),
-            np.arange(len(c)),
-            c,
-        )
         bands.append(band)
-    return np.stack(bands, axis=-1)   # [T, level+1]
-def apply_wavelet_transform(X: np.ndarray, wavelet: str = WAVELET, level: int = LEVEL) -> np.ndarray:
-    """
-    Apply DWT to every feature channel across all samples.
-    Args:
-        X : [n_samples, lookback, n_features]
-    Returns:
-        X_wt : [n_samples, lookback, n_features * (level+1)]
-    """
     n_samples, lookback, n_features = X.shape
-    n_bands   = level + 1
-    X_wt      = np.zeros((n_samples, lookback, n_features * n_bands), dtype=np.float32)
     for s in range(n_samples):
         for f in range(n_features):
-            decomposed = _wavelet_decompose_signal(X[s, :, f], wavelet, level)   # [T, n_bands]
             start = f * n_bands
             X_wt[s, :, start: start + n_bands] = decomposed
     return X_wt
-# ── Model builder ─────────────────────────────────────────────────────────────
-def build_wavelet_cnn_lstm(
-    input_shape: tuple,
-    n_classes: int,
-    dropout: float = 0.3,
-    lstm_units: int = 128,
-) -> keras.Model:
-    """
-    Build Wavelet CNN-LSTM model.
-    Args:
-        input_shape : (lookback, n_features * n_bands)  — post-DWT shape
-        n_classes   : number of output classes (ETFs + CASH)
-        dropout     : dropout rate
-        lstm_units  : LSTM hidden size
-    Returns:
-        Compiled Keras model
-    """
-    inputs = keras.Input(shape=input_shape, name="wavelet_input")
-    # CNN block 1
-    x = keras.layers.Conv1D(64, kernel_size=3, padding="causal", activation="relu")(inputs)
     x = keras.layers.BatchNormalization()(x)
-    x = keras.layers.MaxPooling1D(pool_size=2)(x)
-    # CNN block 2
-    x = keras.layers.Conv1D(32, kernel_size=3, padding="causal", activation="relu")(x)
     x = keras.layers.BatchNormalization()(x)
     x = keras.layers.Dropout(dropout)(x)
-    # LSTM
     x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(x)
-    # Output head
     outputs = classification_head(x, n_classes, dropout)
-    model = keras.Model(inputs, outputs, name="Approach1_Wavelet_CNN_LSTM")
     model.compile(
-        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
         loss="sparse_categorical_crossentropy",
         metrics=["accuracy"],
     )
     return model
-# ── Full train pipeline ───────────────────────────────────────────────────────
 def train_approach1(
-    X_train, y_train,
-    X_val,   y_val,
-    n_classes: int,
-    epochs: int = 100,
-    batch_size: int = 32,
-    dropout: float = 0.3,
-    lstm_units: int = 128,
 ):
-    """
-    Apply wavelet transform then train the CNN-LSTM.
-    Args:
-        X_train/val : [n, lookback, n_features]  (scaled, pre-wavelet)
-        y_train/val : [n] integer class labels
-        n_classes   : total output classes
-    Returns:
-        model    : trained Keras model
-        history  : training history
-        wt_shape : post-DWT input shape (for inference)
-    """
-    # Apply DWT
-    X_train_wt = apply_wavelet_transform(X_train)
-    X_val_wt   = apply_wavelet_transform(X_val)
-    input_shape = X_train_wt.shape[1:]   # (lookback, n_features * n_bands)
     model       = build_wavelet_cnn_lstm(input_shape, n_classes, dropout, lstm_units)
     history = model.fit(
         X_train_wt, y_train,
         validation_data=(X_val_wt, y_val),
         epochs=epochs,
         batch_size=batch_size,
         callbacks=get_callbacks(),
         verbose=0,
     )
     return model, history, input_shape
 def predict_approach1(model, X_test: np.ndarray) -> tuple:
-    """Apply DWT to test set then predict. Returns (class_preds, proba)."""
     X_test_wt = apply_wavelet_transform(X_test)
     proba     = model.predict(X_test_wt, verbose=0)
-    preds     = np.argmax(proba, axis=1)
-    return preds, proba

 """
 models/approach1_wavelet.py
 Approach 1: Wavelet Decomposition CNN-LSTM
+With class weights to prevent majority-class collapse.
 """
 import numpy as np
 import pywt
+WAVELET = "db4"
+LEVEL   = 3
 def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> np.ndarray:
     T      = len(signal)
     coeffs = pywt.wavedec(signal, wavelet, level=level)
     bands  = []
     for c in coeffs:
+        band = np.interp(np.linspace(0, len(c)-1, T), np.arange(len(c)), c)
         bands.append(band)
+    return np.stack(bands, axis=-1)
+def apply_wavelet_transform(X: np.ndarray, wavelet=WAVELET, level=LEVEL) -> np.ndarray:
     n_samples, lookback, n_features = X.shape
+    n_bands = level + 1
+    X_wt    = np.zeros((n_samples, lookback, n_features * n_bands), dtype=np.float32)
     for s in range(n_samples):
         for f in range(n_features):
+            decomposed = _wavelet_decompose_signal(X[s, :, f], wavelet, level)
             start = f * n_bands
             X_wt[s, :, start: start + n_bands] = decomposed
     return X_wt
+def build_wavelet_cnn_lstm(input_shape, n_classes, dropout=0.3, lstm_units=128):
+    from tensorflow import keras
+    from models.base import classification_head
+    inputs = keras.Input(shape=input_shape)
+    x = keras.layers.Conv1D(64, 3, padding="causal", activation="relu")(inputs)
     x = keras.layers.BatchNormalization()(x)
+    x = keras.layers.MaxPooling1D(2)(x)
+    x = keras.layers.Conv1D(32, 3, padding="causal", activation="relu")(x)
     x = keras.layers.BatchNormalization()(x)
     x = keras.layers.Dropout(dropout)(x)
     x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(x)
     outputs = classification_head(x, n_classes, dropout)
+    model = keras.Model(inputs, outputs, name="Approach1_Wavelet")
     model.compile(
+        optimizer=keras.optimizers.Adam(1e-3),
         loss="sparse_categorical_crossentropy",
         metrics=["accuracy"],
     )
     return model
 def train_approach1(
+    X_train, y_train, X_val, y_val,
+    n_classes, epochs=100, batch_size=32, dropout=0.3, lstm_units=128,
 ):
+    from models.base import get_callbacks, compute_class_weights
+    X_train_wt  = apply_wavelet_transform(X_train)
+    X_val_wt    = apply_wavelet_transform(X_val)
+    input_shape = X_train_wt.shape[1:]
     model       = build_wavelet_cnn_lstm(input_shape, n_classes, dropout, lstm_units)
+    cw          = compute_class_weights(y_train, n_classes)
     history = model.fit(
         X_train_wt, y_train,
         validation_data=(X_val_wt, y_val),
         epochs=epochs,
         batch_size=batch_size,
+        class_weight=cw,
         callbacks=get_callbacks(),
         verbose=0,
     )
     return model, history, input_shape
 def predict_approach1(model, X_test: np.ndarray) -> tuple:
     X_test_wt = apply_wavelet_transform(X_test)
     proba     = model.predict(X_test_wt, verbose=0)
+    return np.argmax(proba, axis=1), proba

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py CHANGED Viewed

@@ -166,7 +166,7 @@ def train_approach2(
     Fit HMM regime model then train the regime-conditioned CNN-LSTM.
     Returns: model, history, hmm_model, regime_cols_idx
     """
-    from models.base import get_callbacks
     X_flat_train = X_flat_all[:train_size + lookback]
     hmm_model, regime_cols_idx = fit_regime_model(X_flat_train, feature_names)
@@ -181,11 +181,14 @@ def train_approach2(
         dropout=dropout, lstm_units=lstm_units,
     )
     history = model.fit(
         [X_train, R_train], y_train,
         validation_data=([X_val, R_val], y_val),
         epochs=epochs,
         batch_size=batch_size,
         callbacks=get_callbacks(),
         verbose=0,
     )

     Fit HMM regime model then train the regime-conditioned CNN-LSTM.
     Returns: model, history, hmm_model, regime_cols_idx
     """
+    from models.base import get_callbacks, compute_class_weights
     X_flat_train = X_flat_all[:train_size + lookback]
     hmm_model, regime_cols_idx = fit_regime_model(X_flat_train, feature_names)
         dropout=dropout, lstm_units=lstm_units,
     )
+    cw = compute_class_weights(y_train, n_classes)
     history = model.fit(
         [X_train, R_train], y_train,
         validation_data=([X_val, R_val], y_val),
         epochs=epochs,
         batch_size=batch_size,
+        class_weight=cw,
         callbacks=get_callbacks(),
         verbose=0,
     )

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py CHANGED Viewed

@@ -1,150 +1,80 @@
 """
 models/approach3_multiscale.py
 Approach 3: Multi-Scale Parallel CNN-LSTM
-Pipeline:
-  Raw macro signals
-  → 3 parallel CNN towers: kernel 3 (short), 7 (medium), 21 (long)
-  → Concatenate [96 features]
-  → LSTM (128 units)
-  → Dense 64 → Softmax (n_etfs + 1 CASH)
 """
 import numpy as np
-import tensorflow as tf
-from tensorflow import keras
-from models.base import classification_head, get_callbacks
-# Kernel sizes represent: momentum (3d), weekly cycle (7d), monthly trend (21d)
-KERNEL_SIZES  = [3, 7, 21]
-FILTERS_EACH  = 32   # 32 × 3 towers = 96 concatenated features
-# ── Model builder ─────────────────────────────────────────────────────────────
 def build_multiscale_cnn_lstm(
-    input_shape: tuple,
-    n_classes: int,
-    kernel_sizes: list = None,
-    filters: int = FILTERS_EACH,
-    dropout: float = 0.3,
-    lstm_units: int = 128,
-) -> keras.Model:
-    """
-    Multi-scale parallel CNN-LSTM.
-    Three CNN towers with different kernel sizes run in parallel on the
-    same input, capturing momentum, weekly cycle, and monthly trend
-    simultaneously. Their outputs are concatenated before the LSTM.
-    Args:
-        input_shape  : (lookback, n_features)
-        n_classes    : number of output classes (ETFs + CASH)
-        kernel_sizes : list of kernel sizes for each tower
-        filters      : number of Conv1D filters per tower
-        dropout      : dropout rate
-        lstm_units   : LSTM hidden size
-    Returns:
-        Compiled Keras model
-    """
     if kernel_sizes is None:
         kernel_sizes = KERNEL_SIZES
-    inputs = keras.Input(shape=input_shape, name="multiscale_input")
-    towers = []
     for k in kernel_sizes:
-        # Each tower: Conv → BN → Conv → BN → GlobalAvgPool
-        t = keras.layers.Conv1D(
-            filters, kernel_size=k, padding="causal", activation="relu",
-            name=f"conv1_k{k}"
-        )(inputs)
         t = keras.layers.BatchNormalization(name=f"bn1_k{k}")(t)
-        t = keras.layers.Conv1D(
-            filters, kernel_size=k, padding="causal", activation="relu",
-            name=f"conv2_k{k}"
-        )(t)
         t = keras.layers.BatchNormalization(name=f"bn2_k{k}")(t)
         t = keras.layers.Dropout(dropout, name=f"drop_k{k}")(t)
         towers.append(t)
-    # Concatenate along the feature dimension — keeps temporal axis intact for LSTM
-    if len(towers) > 1:
-        merged = keras.layers.Concatenate(axis=-1, name="tower_concat")(towers)
-    else:
-        merged = towers[0]
-    # LSTM integrates multi-scale temporal features
-    x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1, name="lstm")(merged)
-    # Output head
     outputs = classification_head(x, n_classes, dropout)
-    model = keras.Model(inputs, outputs, name="Approach3_MultiScale_CNN_LSTM")
     model.compile(
-        optimizer=keras.optimizers.Adam(learning_rate=1e-3),
         loss="sparse_categorical_crossentropy",
         metrics=["accuracy"],
     )
     return model
-# ── Full train pipeline ───────────────────────────────────────────────────────
 def train_approach3(
-    X_train, y_train,
-    X_val,   y_val,
-    n_classes: int,
-    epochs: int = 100,
-    batch_size: int = 32,
-    dropout: float = 0.3,
-    lstm_units: int = 128,
-    kernel_sizes: list = None,
 ):
-    """
-    Build and train the multi-scale CNN-LSTM.
-    Args:
-        X_train/val : [n, lookback, n_features]
-        y_train/val : [n] integer class labels
-        n_classes   : total output classes
-    Returns:
-        model   : trained Keras model
-        history : training history
-    """
     if kernel_sizes is None:
         kernel_sizes = KERNEL_SIZES
-    # Guard: lookback must be >= largest kernel
-    lookback = X_train.shape[1]
-    valid_kernels = [k for k in kernel_sizes if k <= lookback]
-    if not valid_kernels:
-        valid_kernels = [min(3, lookback)]
-    model = build_multiscale_cnn_lstm(
-        input_shape=X_train.shape[1:],
-        n_classes=n_classes,
-        kernel_sizes=valid_kernels,
-        dropout=dropout,
-        lstm_units=lstm_units,
     )
     history = model.fit(
         X_train, y_train,
         validation_data=(X_val, y_val),
         epochs=epochs,
         batch_size=batch_size,
         callbacks=get_callbacks(),
         verbose=0,
     )
     return model, history
 def predict_approach3(model, X_test: np.ndarray) -> tuple:
-    """Predict on test set. Returns (class_preds, proba)."""
     proba = model.predict(X_test, verbose=0)
-    preds = np.argmax(proba, axis=1)
-    return preds, proba

 """
 models/approach3_multiscale.py
 Approach 3: Multi-Scale Parallel CNN-LSTM
+With class weights to prevent majority-class collapse.
 """
 import numpy as np
+KERNEL_SIZES = [3, 7, 21]
+FILTERS_EACH = 32
 def build_multiscale_cnn_lstm(
+    input_shape, n_classes, kernel_sizes=None,
+    filters=FILTERS_EACH, dropout=0.3, lstm_units=128,
+):
+    from tensorflow import keras
+    from models.base import classification_head
     if kernel_sizes is None:
         kernel_sizes = KERNEL_SIZES
+    inputs  = keras.Input(shape=input_shape, name="multiscale_input")
+    towers  = []
     for k in kernel_sizes:
+        t = keras.layers.Conv1D(filters, k, padding="causal", activation="relu",
+                                name=f"conv1_k{k}")(inputs)
         t = keras.layers.BatchNormalization(name=f"bn1_k{k}")(t)
+        t = keras.layers.Conv1D(filters, k, padding="causal", activation="relu",
+                                name=f"conv2_k{k}")(t)
         t = keras.layers.BatchNormalization(name=f"bn2_k{k}")(t)
         t = keras.layers.Dropout(dropout, name=f"drop_k{k}")(t)
         towers.append(t)
+    merged = keras.layers.Concatenate(axis=-1)(towers) if len(towers) > 1 else towers[0]
+    x      = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(merged)
     outputs = classification_head(x, n_classes, dropout)
+    model = keras.Model(inputs, outputs, name="Approach3_MultiScale")
     model.compile(
+        optimizer=keras.optimizers.Adam(1e-3),
         loss="sparse_categorical_crossentropy",
         metrics=["accuracy"],
     )
     return model
 def train_approach3(
+    X_train, y_train, X_val, y_val,
+    n_classes, epochs=100, batch_size=32,
+    dropout=0.3, lstm_units=128, kernel_sizes=None,
 ):
+    from models.base import get_callbacks, compute_class_weights
     if kernel_sizes is None:
         kernel_sizes = KERNEL_SIZES
+    lookback      = X_train.shape[1]
+    valid_kernels = [k for k in kernel_sizes if k <= lookback] or [min(3, lookback)]
+    model         = build_multiscale_cnn_lstm(
+        X_train.shape[1:], n_classes, valid_kernels, dropout=dropout, lstm_units=lstm_units,
     )
+    cw = compute_class_weights(y_train, n_classes)
     history = model.fit(
         X_train, y_train,
         validation_data=(X_val, y_val),
         epochs=epochs,
         batch_size=batch_size,
+        class_weight=cw,
         callbacks=get_callbacks(),
         verbose=0,
     )
     return model, history
 def predict_approach3(model, X_test: np.ndarray) -> tuple:
     proba = model.predict(X_test, verbose=0)
+    return np.argmax(proba, axis=1), proba

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py CHANGED Viewed

@@ -227,3 +227,46 @@ def show_audit_trail(audit_trail: list):
         {"selector": "td", "props": [("padding", "10px")]},
     ])
     st.dataframe(styled, use_container_width=True, height=500)

         {"selector": "td", "props": [("padding", "10px")]},
     ])
     st.dataframe(styled, use_container_width=True, height=500)
+# ── All models' next day signals panel ───────────────────────────────────────
+def show_all_signals_panel(all_signals: dict, target_etfs: list, include_cash: bool, next_date):
+    """
+    Compact panel showing what each model predicts for next trading day,
+    with top probability displayed.
+    """
+    APPROACH_COLORS = {
+        "Approach 1": "#00ffc8",
+        "Approach 2": "#7c6aff",
+        "Approach 3": "#ff6b6b",
+    }
+    st.subheader(f"🗓️ All Models — {next_date.strftime('%Y-%m-%d')} Signals")
+    cols = st.columns(len(all_signals))
+    for col, (name, info) in zip(cols, all_signals.items()):
+        color     = APPROACH_COLORS.get(name, "#888888")
+        signal    = info["signal"]
+        proba     = info["proba"]
+        top_prob  = float(np.max(proba)) * 100
+        is_winner = info["is_winner"]
+        border    = f"3px solid {color}"
+        badge     = " ⭐ WINNER" if is_winner else ""
+        col.markdown(f"""
+        <div style="border:{border}; border-radius:12px; padding:18px 16px;
+                    background:#111118; text-align:center;">
+            <div style="color:{color}; font-size:11px; font-weight:700;
+                        letter-spacing:2px; margin-bottom:6px;">
+                {name.upper()}{badge}
+            </div>
+            <div style="color:white; font-size:28px; font-weight:800;
+                        margin:8px 0;">
+                {signal}
+            </div>
+            <div style="color:#aaa; font-size:12px;">
+                Top prob: <span style="color:{color}; font-weight:700;">{top_prob:.1f}%</span>
+            </div>
+        </div>
+        """, unsafe_allow_html=True)

hf_space/hf_space/hf_space/hf_space/hf_space/strategy/backtest.py CHANGED Viewed

@@ -55,6 +55,8 @@ def execute_strategy(
             cls          = min(cls, n_etfs - 1)
             signal_etf   = target_etfs[cls].replace("_Ret", "")
             realized_ret = float(y_raw_test[i][cls])
         net_ret = realized_ret - (fee_bps / 10000)
         strat_rets.append(net_ret)

             cls          = min(cls, n_etfs - 1)
             signal_etf   = target_etfs[cls].replace("_Ret", "")
             realized_ret = float(y_raw_test[i][cls])
+            # Sanity clip: daily returns should never exceed ±50%
+            realized_ret = max(-0.50, min(0.50, realized_ret))
         net_ret = realized_ret - (fee_bps / 10000)
         strat_rets.append(net_ret)

hf_space/hf_space/hf_space/ui/components.py CHANGED Viewed

@@ -1,43 +1,35 @@
 """
 ui/components.py
-Reusable Streamlit UI blocks:
-  - Freshness warning banner
-  - Next trading day signal banner
-  - Signal conviction panel
-  - Metrics row
-  - Audit trail table
-  - Comparison summary table
 """
 import streamlit as st
 import pandas as pd
 import numpy as np
 from signals.conviction import conviction_color, conviction_icon
-# ── Freshness warning ─────────────────────────────────────────────────────────
 def show_freshness_status(freshness: dict):
-    """Display data freshness status. Stops app if data is stale."""
     if freshness.get("fresh"):
         st.success(freshness["message"])
     else:
         st.warning(freshness["message"])
-# ── Next trading day banner ───────────────────────────────────────────────────
 def show_signal_banner(next_signal: str, next_date, approach_name: str):
-    """Large coloured banner showing the winning approach's next signal."""
     is_cash = next_signal == "CASH"
-    bg      = "linear-gradient(135deg, #2d3436 0%, #1a1a2e 100%)" if is_cash else \
-              "linear-gradient(135deg, #00d1b2 0%, #00a896 100%)"
     st.markdown(f"""
     <div style="background:{bg}; padding:25px; border-radius:15px;
-                text-align:center; box-shadow:0 8px 16px rgba(0,0,0,0.3);
-                margin:16px 0;">
       <div style="color:rgba(255,255,255,0.7); font-size:12px;
                   letter-spacing:3px; margin-bottom:6px;">
         {approach_name.upper()} · NEXT TRADING DAY SIGNAL
@@ -50,78 +42,98 @@ def show_signal_banner(next_signal: str, next_date, approach_name: str):
     """, unsafe_allow_html=True)
 # ── Signal conviction panel ───────────────────────────────────────────────────
 def show_conviction_panel(conviction: dict):
-    """
-    White-background conviction panel with Z-score gauge and per-ETF bars.
-    Uses separate st.markdown calls per ETF row to avoid Streamlit HTML escaping.
-    """
-    label      = conviction["label"]
-    z_score    = conviction["z_score"]
-    best_name  = conviction["best_name"]
     sorted_pairs = conviction["sorted_pairs"]
-    color      = conviction_color(label)
-    icon       = conviction_icon(label)
-    z_clipped  = max(-3.0, min(3.0, z_score))
-    bar_pct    = int((z_clipped + 3) / 6 * 100)
-    max_score  = max(s for _, s in sorted_pairs) if sorted_pairs else 1.0
     if max_score <= 0:
         max_score = 1.0
-    # ── Header + gauge ────────────────────────────────────────────────────────
     st.markdown(f"""
     <div style="background:#ffffff; border:1px solid #ddd;
                 border-left:5px solid {color}; border-radius:12px 12px 0 0;
                 padding:18px 24px 12px 24px; margin:12px 0 0 0;
                 box-shadow:0 2px 8px rgba(0,0,0,0.07);">
-      <div style="display:flex; align-items:center; gap:12px;
-                  margin-bottom:14px; flex-wrap:wrap;">
         <span style="font-size:20px;">{icon}</span>
         <span style="font-size:18px; font-weight:700; color:#1a1a1a;">Signal Conviction</span>
-        <span style="background:#f0f0f0; border:1px solid {color};
-                     color:{color}; font-weight:700; font-size:14px;
-                     padding:3px 12px; border-radius:8px;">
           Z = {z_score:.2f} &sigma;
         </span>
         <span style="margin-left:auto; background:{color}; color:#fff;
-                     font-weight:700; padding:4px 16px;
-                     border-radius:20px; font-size:13px;">
           {label}
         </span>
       </div>
       <div style="display:flex; justify-content:space-between;
                   font-size:11px; color:#999; margin-bottom:4px;">
-        <span>Weak &minus;3&sigma;</span>
-        <span>Neutral 0&sigma;</span>
-        <span>Strong +3&sigma;</span>
       </div>
-      <div style="background:#f0f0f0; border-radius:8px; height:14px;
-                  overflow:hidden; position:relative; border:1px solid #e0e0e0;
-                  margin-bottom:14px;">
-        <div style="position:absolute; left:50%; top:0; width:2px;
-                    height:100%; background:#ccc;"></div>
         <div style="width:{bar_pct}%; height:100%;
-                    background:linear-gradient(90deg,#fab1a0,{color});
-                    border-radius:8px;"></div>
       </div>
       <div style="font-size:12px; color:#999; margin-bottom:2px;">
         Model probability by ETF (ranked high &rarr; low):
       </div>
     </div>
     """, unsafe_allow_html=True)
-    # ── Per-ETF rows ──────────────────────────────────────────────────────────
     for i, (name, score) in enumerate(sorted_pairs):
-        is_winner  = (name == best_name)
-        is_last    = (i == len(sorted_pairs) - 1)
-        bar_w      = int(score / max_score * 100)
         name_style = "font-weight:700; color:#00897b;" if is_winner else "color:#444;"
         bar_color  = color if is_winner else "#b2dfdb" if score > max_score * 0.5 else "#e0e0e0"
         star       = " ★" if is_winner else ""
@@ -134,10 +146,9 @@ def show_conviction_panel(conviction: dict):
                     box-shadow:0 2px 8px rgba(0,0,0,0.07);">
           <div style="display:flex; align-items:center; gap:12px;">
             <span style="width:44px; text-align:right; font-size:13px; {name_style}">{name}{star}</span>
-            <div style="flex:1; background:#f5f5f5; border-radius:4px;
-                        height:14px; overflow:hidden; border:1px solid #e8e8e8;">
-              <div style="width:{bar_w}%; height:100%;
-                          background:{bar_color}; border-radius:4px;"></div>
             </div>
             <span style="width:56px; font-size:12px; color:#888; text-align:right;">{score:.4f}</span>
           </div>
@@ -145,7 +156,7 @@ def show_conviction_panel(conviction: dict):
         """, unsafe_allow_html=True)
     st.caption(
-        "Z-score = std deviations the top ETF's probability sits above the mean of all ETF probabilities. "
         "Higher → model is more decisive."
     )
@@ -153,60 +164,44 @@ def show_conviction_panel(conviction: dict):
 # ── Metrics row ───────────────────────────────────────────────────────────────
 def show_metrics_row(result: dict, tbill_rate: float):
-    """Five-column metric display."""
     col1, col2, col3, col4, col5 = st.columns(5)
-    col1.metric(
-        "📈 Annualised Return",
-        f"{result['ann_return']*100:.2f}%",
-        delta=f"vs T-bill: {(result['ann_return'] - tbill_rate)*100:.2f}%",
-    )
-    col2.metric(
-        "📊 Sharpe Ratio",
-        f"{result['sharpe']:.2f}",
-        delta="Risk-Adjusted" if result['sharpe'] > 1 else "Below Threshold",
-    )
-    col3.metric(
-        "🎯 Hit Ratio (15d)",
-        f"{result['hit_ratio']*100:.0f}%",
-        delta="Strong" if result['hit_ratio'] > 0.6 else "Weak",
-    )
-    col4.metric(
-        "📉 Max Drawdown",
-        f"{result['max_dd']*100:.2f}%",
-        delta="Peak to Trough",
-    )
-    col5.metric(
-        "⚠️ Max Daily DD",
-        f"{result['max_daily_dd']*100:.2f}%",
-        delta="Worst Day",
-    )
 # ── Comparison table ──────────────────────────────────────────────────────────
 def show_comparison_table(comparison_df: pd.DataFrame):
-    """Styled comparison table for all three approaches."""
     def highlight_winner(row):
         if "WINNER" in str(row.get("Winner", "")):
             return ["background-color: rgba(0,200,150,0.15); font-weight:bold"] * len(row)
         return [""] * len(row)
-    styled = comparison_df.style.apply(highlight_winner, axis=1).set_properties(**{
-        "text-align": "center",
-        "font-size": "14px",
-    }).set_table_styles([
-        {"selector": "th", "props": [("font-size", "14px"), ("font-weight", "bold"),
-                                      ("text-align", "center")]},
-        {"selector": "td", "props": [("padding", "10px")]},
-    ])
     st.dataframe(styled, use_container_width=True)
 # ── Audit trail ───────────────────────────────────────────────────────────────
 def show_audit_trail(audit_trail: list):
-    """Last 20 days styled audit trail."""
     if not audit_trail:
         st.info("No audit trail data available.")
         return
@@ -214,59 +209,19 @@ def show_audit_trail(audit_trail: list):
     df = pd.DataFrame(audit_trail).tail(20)[["Date", "Signal", "Net_Return"]]
     def color_return(val):
-        return "color: #00c896; font-weight:bold" if val > 0 else "color: #ff4b4b; font-weight:bold"
-    styled = df.style.applymap(color_return, subset=["Net_Return"]).format(
-        {"Net_Return": "{:.2%}"}
-    ).set_properties(**{
-        "font-size": "16px",
-        "text-align": "center",
-    }).set_table_styles([
-        {"selector": "th", "props": [("font-size", "16px"), ("font-weight", "bold"),
-                                      ("text-align", "center")]},
-        {"selector": "td", "props": [("padding", "10px")]},
-    ])
     st.dataframe(styled, use_container_width=True, height=500)
-# ── All models' next day signals panel ───────────────────────────────────────
-def show_all_signals_panel(all_signals: dict, target_etfs: list, include_cash: bool, next_date):
-    """
-    Compact panel showing what each model predicts for next trading day,
-    with top probability displayed.
-    """
-    APPROACH_COLORS = {
-        "Approach 1": "#00ffc8",
-        "Approach 2": "#7c6aff",
-        "Approach 3": "#ff6b6b",
-    }
-    st.subheader(f"🗓️ All Models — {next_date.strftime('%Y-%m-%d')} Signals")
-    cols = st.columns(len(all_signals))
-    for col, (name, info) in zip(cols, all_signals.items()):
-        color     = APPROACH_COLORS.get(name, "#888888")
-        signal    = info["signal"]
-        proba     = info["proba"]
-        top_prob  = float(np.max(proba)) * 100
-        is_winner = info["is_winner"]
-        border    = f"3px solid {color}"
-        badge     = " ⭐ WINNER" if is_winner else ""
-        col.markdown(f"""
-        <div style="border:{border}; border-radius:12px; padding:18px 16px;
-                    background:#111118; text-align:center;">
-            <div style="color:{color}; font-size:11px; font-weight:700;
-                        letter-spacing:2px; margin-bottom:6px;">
-                {name.upper()}{badge}
-            </div>
-            <div style="color:white; font-size:28px; font-weight:800;
-                        margin:8px 0;">
-                {signal}
-            </div>
-            <div style="color:#aaa; font-size:12px;">
-                Top prob: <span style="color:{color}; font-weight:700;">{top_prob:.1f}%</span>
-            </div>
-        </div>
-        """, unsafe_allow_html=True)

 """
 ui/components.py
+Reusable Streamlit UI blocks.
+- Fixed applymap → map deprecation
+- Removed debug expanders
+- Added show_all_signals_panel
 """
 import streamlit as st
 import pandas as pd
 import numpy as np
 from signals.conviction import conviction_color, conviction_icon
+# ── Freshness status ──────────────────────────────────────────────────────────
 def show_freshness_status(freshness: dict):
     if freshness.get("fresh"):
         st.success(freshness["message"])
     else:
         st.warning(freshness["message"])
+# ── Winner signal banner ──────────────────────────────────────────────────────
 def show_signal_banner(next_signal: str, next_date, approach_name: str):
     is_cash = next_signal == "CASH"
+    bg      = ("linear-gradient(135deg, #2d3436 0%, #1a1a2e 100%)" if is_cash
+               else "linear-gradient(135deg, #00d1b2 0%, #00a896 100%)")
     st.markdown(f"""
     <div style="background:{bg}; padding:25px; border-radius:15px;
+                text-align:center; box-shadow:0 8px 16px rgba(0,0,0,0.3); margin:16px 0;">
       <div style="color:rgba(255,255,255,0.7); font-size:12px;
                   letter-spacing:3px; margin-bottom:6px;">
         {approach_name.upper()} · NEXT TRADING DAY SIGNAL
     """, unsafe_allow_html=True)
+# ── All models signals panel ──────────────────────────────────────────────────
+def show_all_signals_panel(all_signals: dict, target_etfs: list,
+                            include_cash: bool, next_date, optimal_lookback: int):
+    APPROACH_COLORS = {
+        "Approach 1": "#00ffc8",
+        "Approach 2": "#7c6aff",
+        "Approach 3": "#ff6b6b",
+    }
+    st.subheader(f"🗓️ All Models — {next_date.strftime('%Y-%m-%d')} Signals")
+    st.caption(f"📐 Optimal lookback: **{optimal_lookback}d** (auto-selected from 30/45/60)")
+    cols = st.columns(len(all_signals))
+    for col, (name, info) in zip(cols, all_signals.items()):
+        color     = APPROACH_COLORS.get(name, "#888888")
+        signal    = info["signal"]
+        proba     = info["proba"]
+        top_prob  = float(np.max(proba)) * 100
+        is_winner = info["is_winner"]
+        badge     = " ⭐" if is_winner else ""
+        col.markdown(f"""
+        <div style="border:2px solid {color}; border-radius:12px; padding:18px 16px;
+                    background:#111118; text-align:center; margin-bottom:8px;">
+            <div style="color:{color}; font-size:10px; font-weight:700;
+                        letter-spacing:2px; margin-bottom:6px;">
+                {name.upper()}{badge}
+            </div>
+            <div style="color:white; font-size:30px; font-weight:800; margin:8px 0;">
+                {signal}
+            </div>
+            <div style="color:#aaa; font-size:12px;">
+                Confidence: <span style="color:{color}; font-weight:700;">{top_prob:.1f}%</span>
+            </div>
+        </div>
+        """, unsafe_allow_html=True)
 # ── Signal conviction panel ───────────────────────────────────────────────────
 def show_conviction_panel(conviction: dict):
+    label        = conviction["label"]
+    z_score      = conviction["z_score"]
+    best_name    = conviction["best_name"]
     sorted_pairs = conviction["sorted_pairs"]
+    color        = conviction_color(label)
+    icon         = conviction_icon(label)
+    z_clipped = max(-3.0, min(3.0, z_score))
+    bar_pct   = int((z_clipped + 3) / 6 * 100)
+    max_score = max((s for _, s in sorted_pairs), default=1.0)
     if max_score <= 0:
         max_score = 1.0
     st.markdown(f"""
     <div style="background:#ffffff; border:1px solid #ddd;
                 border-left:5px solid {color}; border-radius:12px 12px 0 0;
                 padding:18px 24px 12px 24px; margin:12px 0 0 0;
                 box-shadow:0 2px 8px rgba(0,0,0,0.07);">
+      <div style="display:flex; align-items:center; gap:12px; margin-bottom:14px; flex-wrap:wrap;">
         <span style="font-size:20px;">{icon}</span>
         <span style="font-size:18px; font-weight:700; color:#1a1a1a;">Signal Conviction</span>
+        <span style="background:#f0f0f0; border:1px solid {color}; color:{color};
+                     font-weight:700; font-size:14px; padding:3px 12px; border-radius:8px;">
           Z = {z_score:.2f} &sigma;
         </span>
         <span style="margin-left:auto; background:{color}; color:#fff;
+                     font-weight:700; padding:4px 16px; border-radius:20px; font-size:13px;">
           {label}
         </span>
       </div>
       <div style="display:flex; justify-content:space-between;
                   font-size:11px; color:#999; margin-bottom:4px;">
+        <span>Weak &minus;3&sigma;</span><span>Neutral 0&sigma;</span><span>Strong +3&sigma;</span>
       </div>
+      <div style="background:#f0f0f0; border-radius:8px; height:14px; overflow:hidden;
+                  position:relative; border:1px solid #e0e0e0; margin-bottom:14px;">
+        <div style="position:absolute; left:50%; top:0; width:2px; height:100%; background:#ccc;"></div>
         <div style="width:{bar_pct}%; height:100%;
+                    background:linear-gradient(90deg,#fab1a0,{color}); border-radius:8px;"></div>
       </div>
       <div style="font-size:12px; color:#999; margin-bottom:2px;">
         Model probability by ETF (ranked high &rarr; low):
       </div>
     </div>
     """, unsafe_allow_html=True)
     for i, (name, score) in enumerate(sorted_pairs):
+        is_winner = (name == best_name)
+        is_last   = (i == len(sorted_pairs) - 1)
+        bar_w     = int(score / max_score * 100)
         name_style = "font-weight:700; color:#00897b;" if is_winner else "color:#444;"
         bar_color  = color if is_winner else "#b2dfdb" if score > max_score * 0.5 else "#e0e0e0"
         star       = " ★" if is_winner else ""
                     box-shadow:0 2px 8px rgba(0,0,0,0.07);">
           <div style="display:flex; align-items:center; gap:12px;">
             <span style="width:44px; text-align:right; font-size:13px; {name_style}">{name}{star}</span>
+            <div style="flex:1; background:#f5f5f5; border-radius:4px; height:14px;
+                        overflow:hidden; border:1px solid #e8e8e8;">
+              <div style="width:{bar_w}%; height:100%; background:{bar_color}; border-radius:4px;"></div>
             </div>
             <span style="width:56px; font-size:12px; color:#888; text-align:right;">{score:.4f}</span>
           </div>
         """, unsafe_allow_html=True)
     st.caption(
+        "Z-score = std deviations the top ETF's probability sits above the mean. "
         "Higher → model is more decisive."
     )
 # ── Metrics row ───────────────────────────────────────────────────────────────
 def show_metrics_row(result: dict, tbill_rate: float):
     col1, col2, col3, col4, col5 = st.columns(5)
+    col1.metric("📈 Ann. Return",   f"{result['ann_return']*100:.2f}%",
+                delta=f"vs T-bill: {(result['ann_return'] - tbill_rate)*100:.2f}%")
+    col2.metric("📊 Sharpe",        f"{result['sharpe']:.2f}",
+                delta="Strong" if result['sharpe'] > 1 else "Weak")
+    col3.metric("🎯 Hit Ratio 15d", f"{result['hit_ratio']*100:.0f}%",
+                delta="Good" if result['hit_ratio'] > 0.55 else "Weak")
+    col4.metric("📉 Max Drawdown",  f"{result['max_dd']*100:.2f}%",
+                delta="Peak to Trough")
+    col5.metric("⚠️ Max Daily DD",  f"{result['max_daily_dd']*100:.2f}%",
+                delta="Worst Day")
 # ── Comparison table ──────────────────────────────────────────────────────────
 def show_comparison_table(comparison_df: pd.DataFrame):
     def highlight_winner(row):
         if "WINNER" in str(row.get("Winner", "")):
             return ["background-color: rgba(0,200,150,0.15); font-weight:bold"] * len(row)
         return [""] * len(row)
+    styled = (
+        comparison_df.style
+        .apply(highlight_winner, axis=1)
+        .set_properties(**{"text-align": "center", "font-size": "14px"})
+        .set_table_styles([
+            {"selector": "th", "props": [("font-size", "14px"),
+                                          ("font-weight", "bold"),
+                                          ("text-align", "center")]},
+            {"selector": "td", "props": [("padding", "10px")]},
+        ])
+    )
     st.dataframe(styled, use_container_width=True)
 # ── Audit trail ───────────────────────────────────────────────────────────────
 def show_audit_trail(audit_trail: list):
     if not audit_trail:
         st.info("No audit trail data available.")
         return
     df = pd.DataFrame(audit_trail).tail(20)[["Date", "Signal", "Net_Return"]]
     def color_return(val):
+        return ("color: #00c896; font-weight:bold" if val > 0
+                else "color: #ff4b4b; font-weight:bold")
+    styled = (
+        df.style
+        .map(color_return, subset=["Net_Return"])
+        .format({"Net_Return": "{:.2%}"})
+        .set_properties(**{"font-size": "14px", "text-align": "center"})
+        .set_table_styles([
+            {"selector": "th", "props": [("font-size", "14px"),
+                                          ("font-weight", "bold"),
+                                          ("text-align", "center")]},
+            {"selector": "td", "props": [("padding", "10px")]},
+        ])
+    )
     st.dataframe(styled, use_container_width=True, height=500)

hf_space/hf_space/ui/charts.py CHANGED Viewed

@@ -1,22 +1,15 @@
 """
 ui/charts.py
-All Plotly chart builders for the Streamlit UI.
 """
 import numpy as np
 import pandas as pd
 import plotly.graph_objects as go
-APPROACH_COLOURS = {
-    "Approach 1": "#00ffc8",
-    "Approach 2": "#7c6aff",
-    "Approach 3": "#ff6b6b",
-}
-BENCHMARK_COLOURS = {
-    "SPY": "#ff4b4b",
-    "AGG": "#ffa500",
-}
 def equity_curve_chart(
@@ -28,117 +21,72 @@ def equity_curve_chart(
     tbill_rate: float,
 ) -> go.Figure:
     """
-    Equity curve chart showing all three approaches + SPY + AGG benchmarks.
-    Args:
-        results     : {approach_name: result_dict}
-        winner_name : highlighted approach
-        plot_dates  : DatetimeIndex for x-axis
-        df          : full DataFrame (for benchmark columns)
-        test_slice  : slice object to extract test-period benchmark returns
-        tbill_rate  : for benchmark metric calculation
     """
     from strategy.backtest import compute_benchmark_metrics
     fig = go.Figure()
-    # ── Strategy lines ────────────────────────────────────────────────────────
-    for name, res in results.items():
-        if res is None:
-            continue
-        colour = APPROACH_COLOURS.get(name, "#aaaaaa")
-        width  = 3 if name == winner_name else 1.5
-        dash   = "solid" if name == winner_name else "dot"
-        n = min(len(res["cum_returns"]), len(plot_dates))
         fig.add_trace(go.Scatter(
             x=plot_dates[:n],
-            y=res["cum_returns"][:n],
             mode="lines",
-            name=f"{name} {'★' if name == winner_name else ''}",
-            line=dict(color=colour, width=width, dash=dash),
-            fill="tozeroy" if name == winner_name else None,
-            fillcolor=f"rgba({_hex_to_rgb(colour)},0.07)" if name == winner_name else None,
         ))
-    # ── Benchmark: SPY ────────────────────────────────────────────────────────
     if "SPY_Ret" in df.columns:
-        spy_rets = df["SPY_Ret"].iloc[test_slice].values
         n        = min(len(spy_rets), len(plot_dates))
         spy_m    = compute_benchmark_metrics(spy_rets[:n], tbill_rate)
         fig.add_trace(go.Scatter(
             x=plot_dates[:n],
-            y=spy_m["cum_returns"],
             mode="lines",
-            name="SPY (Equity BM)",
             line=dict(color=BENCHMARK_COLOURS["SPY"], width=1.5, dash="dot"),
         ))
-    # ── Benchmark: AGG ────────────────────────────────────────────────────────
     if "AGG_Ret" in df.columns:
-        agg_rets = df["AGG_Ret"].iloc[test_slice].values
         n        = min(len(agg_rets), len(plot_dates))
         agg_m    = compute_benchmark_metrics(agg_rets[:n], tbill_rate)
         fig.add_trace(go.Scatter(
             x=plot_dates[:n],
-            y=agg_m["cum_returns"],
             mode="lines",
-            name="AGG (Bond BM)",
             line=dict(color=BENCHMARK_COLOURS["AGG"], width=1.5, dash="dot"),
         ))
     fig.update_layout(
         template="plotly_dark",
-        height=460,
         hovermode="x unified",
-        showlegend=True,
         legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01, font=dict(size=11)),
         xaxis_title="Date",
-        yaxis_title="Cumulative Return (×)",
         margin=dict(l=50, r=30, t=20, b=50),
     )
     return fig
-def comparison_bar_chart(results: dict, winner_name: str) -> go.Figure:
-    """
-    Horizontal bar chart comparing annualised returns across all three approaches.
-    """
-    names   = []
-    returns = []
-    colours = []
-    for name, res in results.items():
-        if res is None:
-            continue
-        names.append(name)
-        returns.append(res["ann_return"] * 100)
-        colours.append(APPROACH_COLOURS.get(name, "#aaaaaa"))
-    fig = go.Figure(go.Bar(
-        x=returns,
-        y=names,
-        orientation="h",
-        marker_color=colours,
-        text=[f"{r:.1f}%" for r in returns],
-        textposition="auto",
-    ))
-    fig.update_layout(
-        template="plotly_dark",
-        height=200,
-        xaxis_title="Annualised Return (%)",
-        margin=dict(l=100, r=30, t=10, b=40),
-        showlegend=False,
-    )
-    return fig
-# ── Helper ────────────────────────────────────────────────────────────────────
 def _hex_to_rgb(hex_color: str) -> str:
-    """Convert #rrggbb to 'r,g,b' string for rgba()."""
     h = hex_color.lstrip("#")
     r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
     return f"{r},{g},{b}"

 """
 ui/charts.py
+Plotly chart builders.
+Equity curve: winner + SPY + AGG only. Y-axis as % growth (not raw multiplier).
 """
 import numpy as np
 import pandas as pd
 import plotly.graph_objects as go
+WINNER_COLOUR    = "#00ffc8"
+BENCHMARK_COLOURS = {"SPY": "#ff4b4b", "AGG": "#ffa500"}
 def equity_curve_chart(
     tbill_rate: float,
 ) -> go.Figure:
     """
+    Equity curve: winner strategy vs SPY and AGG.
+    Y-axis shows % growth (cum_return - 1) * 100 for readability.
     """
     from strategy.backtest import compute_benchmark_metrics
     fig = go.Figure()
+    # ── Winner strategy ───────────────────────────────────────────────────────
+    winner_res = results.get(winner_name)
+    if winner_res is not None:
+        cum = winner_res["cum_returns"]
+        n   = min(len(cum), len(plot_dates))
         fig.add_trace(go.Scatter(
             x=plot_dates[:n],
+            y=(cum[:n] - 1) * 100,
             mode="lines",
+            name=f"{winner_name} ★",
+            line=dict(color=WINNER_COLOUR, width=2.5),
+            fill="tozeroy",
+            fillcolor="rgba(0,255,200,0.07)",
         ))
+    # ── SPY benchmark ─────────────────────────────────────────────────────────
     if "SPY_Ret" in df.columns:
+        spy_rets = df["SPY_Ret"].iloc[test_slice].values.copy()
+        spy_rets = np.clip(spy_rets, -0.5, 0.5)   # sanity clip
+        spy_rets = spy_rets[~np.isnan(spy_rets)]
         n        = min(len(spy_rets), len(plot_dates))
         spy_m    = compute_benchmark_metrics(spy_rets[:n], tbill_rate)
         fig.add_trace(go.Scatter(
             x=plot_dates[:n],
+            y=(spy_m["cum_returns"] - 1) * 100,
             mode="lines",
+            name="SPY",
             line=dict(color=BENCHMARK_COLOURS["SPY"], width=1.5, dash="dot"),
         ))
+    # ── AGG benchmark ─────────────────────────────────────────────────────────
     if "AGG_Ret" in df.columns:
+        agg_rets = df["AGG_Ret"].iloc[test_slice].values.copy()
+        agg_rets = np.clip(agg_rets, -0.5, 0.5)
+        agg_rets = agg_rets[~np.isnan(agg_rets)]
         n        = min(len(agg_rets), len(plot_dates))
         agg_m    = compute_benchmark_metrics(agg_rets[:n], tbill_rate)
         fig.add_trace(go.Scatter(
             x=plot_dates[:n],
+            y=(agg_m["cum_returns"] - 1) * 100,
             mode="lines",
+            name="AGG",
             line=dict(color=BENCHMARK_COLOURS["AGG"], width=1.5, dash="dot"),
         ))
     fig.update_layout(
         template="plotly_dark",
+        height=420,
         hovermode="x unified",
         legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01, font=dict(size=11)),
         xaxis_title="Date",
+        yaxis_title="Cumulative Return (%)",
         margin=dict(l=50, r=30, t=20, b=50),
+        yaxis=dict(ticksuffix="%"),
     )
     return fig
 def _hex_to_rgb(hex_color: str) -> str:
     h = hex_color.lstrip("#")
     r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
     return f"{r},{g},{b}"

hf_space/models/base.py CHANGED Viewed

@@ -1,28 +1,53 @@
 """
 models/base.py
-Shared utilities for all three CNN-LSTM variants.
-Key fix: class_weight support to prevent majority-class collapse.
 """
 import numpy as np
-import pandas as pd
 from sklearn.preprocessing import RobustScaler
 from sklearn.utils.class_weight import compute_class_weight
-import tensorflow as tf
-from tensorflow import keras
-SEED = 42
-tf.random.set_seed(SEED)
 np.random.seed(SEED)
 # ── Sequence builder ──────────────────────────────────────────────────────────
 def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int):
-    """
-    Build supervised sequences for CNN-LSTM input.
-    X[i] = features[i : i+lookback]  →  predicts  y[i+lookback]
-    """
     X, y = [], []
     for i in range(lookback, len(features)):
         X.append(features[i - lookback: i])
@@ -36,35 +61,25 @@ def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15):
     n  = len(X)
     t1 = int(n * train_pct)
     t2 = int(n * (train_pct + val_pct))
-    return (
-        X[:t1],  y[:t1],
-        X[t1:t2], y[t1:t2],
-        X[t2:],  y[t2:],
-    )
 # ── Feature scaling ───────────────────────────────────────────────────────────
 def scale_features(X_train, X_val, X_test):
-    n_feat  = X_train.shape[2]
-    scaler  = RobustScaler()
     scaler.fit(X_train.reshape(-1, n_feat))
     def _t(X):
         s = X.shape
         return scaler.transform(X.reshape(-1, n_feat)).reshape(s)
     return _t(X_train), _t(X_val), _t(X_test), scaler
 # ── Label builder ─────────────────────────────────────────────────────────────
 def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0):
-    """
-    Assign label = argmax(returns).
-    If include_cash and best return < cash_threshold → label = n_etfs (CASH).
-    """
-    best        = np.argmax(y_raw, axis=1)
     if include_cash:
         best_ret = y_raw[np.arange(len(y_raw)), best]
         cash_idx = y_raw.shape[1]
@@ -77,35 +92,22 @@ def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0):
 # ── Class weights ─────────────────────────────────────────────────────────────
 def compute_class_weights(y_labels: np.ndarray, n_classes: int) -> dict:
-    """
-    Compute balanced class weights to counteract majority-class collapse.
-    Returns dict {class_index: weight} for use in model.fit().
-    """
-    classes = np.arange(n_classes)
     present = np.unique(y_labels)
     try:
-        weights = compute_class_weight(
-            class_weight="balanced",
-            classes=present,
-            y=y_labels,
-        )
         weight_dict = {int(c): float(w) for c, w in zip(present, weights)}
     except Exception:
         weight_dict = {}
-    # Fill any missing classes with weight 1.0
-    for c in classes:
         if c not in weight_dict:
             weight_dict[c] = 1.0
     return weight_dict
 # ── Callbacks ─────────────────────────────────────────────────────────────────
-def get_callbacks(patience_es=20, patience_lr=10, min_lr=1e-6):
-    """Longer patience to allow models time to learn past majority class."""
     return [
         keras.callbacks.EarlyStopping(
             monitor="val_loss",
@@ -123,51 +125,76 @@ def get_callbacks(patience_es=20, patience_lr=10, min_lr=1e-6):
     ]
-# ── Output head ───────────────────────────────────────────────────────────────
 def classification_head(x, n_classes: int, dropout: float = 0.3):
-    x = keras.layers.Dense(64, activation="relu")(x)
-    x = keras.layers.BatchNormalization()(x)
-    x = keras.layers.Dropout(dropout)(x)
     x = keras.layers.Dense(32, activation="relu")(x)
-    x = keras.layers.Dropout(dropout / 2)(x)
     x = keras.layers.Dense(n_classes, activation="softmax")(x)
     return x
-# ── Prediction ────────────────────────────────────────────────────────────────
-def predict_classes(model, X_test: np.ndarray) -> tuple:
-    proba = model.predict(X_test, verbose=0)
-    return np.argmax(proba, axis=1), proba
-# ── Metrics ───────────────────────────────────────────────────────────────────
-def evaluate_returns(
-    preds, proba, y_raw_test, target_etfs, tbill_rate, fee_bps, include_cash=True,
 ):
-    n_etfs     = len(target_etfs)
-    daily_tbill = tbill_rate / 252
-    strat_rets  = []
-    for i, cls in enumerate(preds):
-        if include_cash and cls == n_etfs:
-            net = daily_tbill - fee_bps / 10000
-        else:
-            cls = min(int(cls), n_etfs - 1)
-            net = float(y_raw_test[i][cls]) - fee_bps / 10000
-        strat_rets.append(net)
-    strat_rets  = np.array(strat_rets)
-    cum_returns = np.cumprod(1 + strat_rets)
-    ann_return  = cum_returns[-1] ** (252 / len(strat_rets)) - 1
-    last_proba  = proba[-1]
-    next_cls    = int(np.argmax(last_proba))
-    next_etf    = (
-        "CASH" if (include_cash and next_cls == n_etfs)
-        else target_etfs[min(next_cls, n_etfs - 1)].replace("_Ret", "")
-    )
-    return strat_rets, ann_return, cum_returns, last_proba, next_etf

 """
 models/base.py
+Shared utilities for all CNN-LSTM variants.
+Optimised for CPU training on HF Spaces.
 """
 import numpy as np
+import hashlib
+import pickle
+import os
+from pathlib import Path
 from sklearn.preprocessing import RobustScaler
 from sklearn.utils.class_weight import compute_class_weight
+SEED     = 42
+CACHE_DIR = Path("/tmp/p2_model_cache")
+CACHE_DIR.mkdir(exist_ok=True)
 np.random.seed(SEED)
+# ── Cache helpers ─────────────────────────────────────────────────────────────
+def make_cache_key(last_date: str, start_yr: int, fee_bps: int,
+                   epochs: int, split: str, include_cash: bool,
+                   lookback: int) -> str:
+    raw = f"{last_date}_{start_yr}_{fee_bps}_{epochs}_{split}_{include_cash}_{lookback}"
+    return hashlib.md5(raw.encode()).hexdigest()
+def save_cache(key: str, payload: dict):
+    path = CACHE_DIR / f"{key}.pkl"
+    with open(path, "wb") as f:
+        pickle.dump(payload, f)
+def load_cache(key: str) -> dict | None:
+    path = CACHE_DIR / f"{key}.pkl"
+    if path.exists():
+        try:
+            with open(path, "rb") as f:
+                return pickle.load(f)
+        except Exception:
+            path.unlink(missing_ok=True)
+    return None
 # ── Sequence builder ──────────────────────────────────────────────────────────
 def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int):
     X, y = [], []
     for i in range(lookback, len(features)):
         X.append(features[i - lookback: i])
     n  = len(X)
     t1 = int(n * train_pct)
     t2 = int(n * (train_pct + val_pct))
+    return X[:t1], y[:t1], X[t1:t2], y[t1:t2], X[t2:], y[t2:]
 # ── Feature scaling ───────────────────────────────────────────────────────────
 def scale_features(X_train, X_val, X_test):
+    n_feat = X_train.shape[2]
+    scaler = RobustScaler()
     scaler.fit(X_train.reshape(-1, n_feat))
     def _t(X):
         s = X.shape
         return scaler.transform(X.reshape(-1, n_feat)).reshape(s)
     return _t(X_train), _t(X_val), _t(X_test), scaler
 # ── Label builder ─────────────────────────────────────────────────────────────
 def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0):
+    best = np.argmax(y_raw, axis=1)
     if include_cash:
         best_ret = y_raw[np.arange(len(y_raw)), best]
         cash_idx = y_raw.shape[1]
 # ── Class weights ─────────────────────────────────────────────────────────────
 def compute_class_weights(y_labels: np.ndarray, n_classes: int) -> dict:
     present = np.unique(y_labels)
     try:
+        weights = compute_class_weight("balanced", classes=present, y=y_labels)
         weight_dict = {int(c): float(w) for c, w in zip(present, weights)}
     except Exception:
         weight_dict = {}
+    for c in range(n_classes):
         if c not in weight_dict:
             weight_dict[c] = 1.0
     return weight_dict
 # ── Callbacks ─────────────────────────────────────────────────────────────────
+def get_callbacks(patience_es=15, patience_lr=8, min_lr=1e-6):
+    from tensorflow import keras
     return [
         keras.callbacks.EarlyStopping(
             monitor="val_loss",
     ]
+# ── Lightweight output head (CPU-optimised) ───────────────────────────────────
 def classification_head(x, n_classes: int, dropout: float = 0.3):
+    """Smaller head than original — faster on CPU, less overfitting risk."""
+    from tensorflow import keras
     x = keras.layers.Dense(32, activation="relu")(x)
+    x = keras.layers.Dropout(dropout)(x)
     x = keras.layers.Dense(n_classes, activation="softmax")(x)
     return x
+# ── Auto lookback selection ───────────────────────────────────────────────────
+def find_best_lookback(
+    X_raw: np.ndarray,
+    y_raw: np.ndarray,
+    y_labels_fn,
+    train_pct: float,
+    val_pct: float,
+    n_classes: int,
+    include_cash: bool,
+    candidates: list = None,
 ):
+    """
+    Train a fast lightweight CNN on each lookback candidate using val loss.
+    Returns best lookback int.
+    Uses only Approach 1 architecture (fastest) to pick the winner.
+    """
+    from tensorflow import keras
+    if candidates is None:
+        candidates = [30, 45, 60]
+    best_lb   = candidates[0]
+    best_loss = np.inf
+    for lb in candidates:
+        try:
+            X_seq, y_seq = build_sequences(X_raw, y_raw, lb)
+            y_lab        = y_labels_fn(y_seq)
+            X_tr, y_tr, X_v, y_v, _, _ = train_val_test_split(X_seq, y_lab, train_pct, val_pct)
+            X_tr_s, X_v_s, _, _        = scale_features(X_tr, X_v, X_v)
+            cw = compute_class_weights(y_tr, n_classes)
+            # Tiny fast model just for lookback selection
+            inp = keras.Input(shape=X_tr_s.shape[1:])
+            x   = keras.layers.Conv1D(16, min(3, lb), padding="causal", activation="relu")(inp)
+            x   = keras.layers.GlobalAveragePooling1D()(x)
+            out = keras.layers.Dense(n_classes, activation="softmax")(x)
+            m   = keras.Model(inp, out)
+            m.compile(optimizer="adam", loss="sparse_categorical_crossentropy")
+            hist = m.fit(
+                X_tr_s, y_tr,
+                validation_data=(X_v_s, y_v),
+                epochs=15,
+                batch_size=64,
+                class_weight=cw,
+                callbacks=[keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)],
+                verbose=0,
+            )
+            val_loss = min(hist.history.get("val_loss", [np.inf]))
+            if val_loss < best_loss:
+                best_loss = val_loss
+                best_lb   = lb
+            del m
+        except Exception:
+            continue
+    return best_lb

models/approach1_wavelet.py CHANGED Viewed

@@ -1,14 +1,21 @@
 """
 models/approach1_wavelet.py
 Approach 1: Wavelet Decomposition CNN-LSTM
-With class weights to prevent majority-class collapse.
 """
 import numpy as np
 import pywt
 WAVELET = "db4"
-LEVEL   = 3
 def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> np.ndarray:
@@ -16,13 +23,14 @@ def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> n
     coeffs = pywt.wavedec(signal, wavelet, level=level)
     bands  = []
     for c in coeffs:
-        band = np.interp(np.linspace(0, len(c)-1, T), np.arange(len(c)), c)
         bands.append(band)
     return np.stack(bands, axis=-1)
-def apply_wavelet_transform(X: np.ndarray, wavelet=WAVELET, level=LEVEL) -> np.ndarray:
     n_samples, lookback, n_features = X.shape
     n_bands = level + 1
     X_wt    = np.zeros((n_samples, lookback, n_features * n_bands), dtype=np.float32)
     for s in range(n_samples):
@@ -33,18 +41,18 @@ def apply_wavelet_transform(X: np.ndarray, wavelet=WAVELET, level=LEVEL) -> np.n
     return X_wt
-def build_wavelet_cnn_lstm(input_shape, n_classes, dropout=0.3, lstm_units=128):
     from tensorflow import keras
     from models.base import classification_head
     inputs = keras.Input(shape=input_shape)
-    x = keras.layers.Conv1D(64, 3, padding="causal", activation="relu")(inputs)
     x = keras.layers.BatchNormalization()(x)
     x = keras.layers.MaxPooling1D(2)(x)
-    x = keras.layers.Conv1D(32, 3, padding="causal", activation="relu")(x)
     x = keras.layers.BatchNormalization()(x)
     x = keras.layers.Dropout(dropout)(x)
-    x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(x)
     outputs = classification_head(x, n_classes, dropout)
     model = keras.Model(inputs, outputs, name="Approach1_Wavelet")
@@ -58,7 +66,7 @@ def build_wavelet_cnn_lstm(input_shape, n_classes, dropout=0.3, lstm_units=128):
 def train_approach1(
     X_train, y_train, X_val, y_val,
-    n_classes, epochs=100, batch_size=32, dropout=0.3, lstm_units=128,
 ):
     from models.base import get_callbacks, compute_class_weights

 """
 models/approach1_wavelet.py
 Approach 1: Wavelet Decomposition CNN-LSTM
+- Dynamic wavelet level based on sequence length (no boundary warnings)
+- CPU-optimised smaller architecture
+- Class weights to prevent majority-class collapse
 """
 import numpy as np
 import pywt
 WAVELET = "db4"
+def _safe_wavelet_level(lookback: int, wavelet: str = WAVELET) -> int:
+    """Compute max safe wavelet level for the given sequence length."""
+    max_level = pywt.dwt_max_level(lookback, wavelet)
+    return min(2, max_level)   # cap at 2 to avoid boundary effects
 def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> np.ndarray:
     coeffs = pywt.wavedec(signal, wavelet, level=level)
     bands  = []
     for c in coeffs:
+        band = np.interp(np.linspace(0, len(c) - 1, T), np.arange(len(c)), c)
         bands.append(band)
     return np.stack(bands, axis=-1)
+def apply_wavelet_transform(X: np.ndarray, wavelet: str = WAVELET) -> np.ndarray:
     n_samples, lookback, n_features = X.shape
+    level   = _safe_wavelet_level(lookback, wavelet)
     n_bands = level + 1
     X_wt    = np.zeros((n_samples, lookback, n_features * n_bands), dtype=np.float32)
     for s in range(n_samples):
     return X_wt
+def build_wavelet_cnn_lstm(input_shape, n_classes, dropout=0.3, lstm_units=64):
     from tensorflow import keras
     from models.base import classification_head
     inputs = keras.Input(shape=input_shape)
+    x = keras.layers.Conv1D(32, 3, padding="causal", activation="relu")(inputs)
     x = keras.layers.BatchNormalization()(x)
     x = keras.layers.MaxPooling1D(2)(x)
+    x = keras.layers.Conv1D(16, 3, padding="causal", activation="relu")(x)
     x = keras.layers.BatchNormalization()(x)
     x = keras.layers.Dropout(dropout)(x)
+    x = keras.layers.LSTM(lstm_units, dropout=dropout)(x)
     outputs = classification_head(x, n_classes, dropout)
     model = keras.Model(inputs, outputs, name="Approach1_Wavelet")
 def train_approach1(
     X_train, y_train, X_val, y_val,
+    n_classes, epochs=80, batch_size=64, dropout=0.3, lstm_units=64,
 ):
     from models.base import get_callbacks, compute_class_weights