GitHub Actions commited on
Commit
1b670cd
·
1 Parent(s): a84f653

Sync from GitHub: 39a55a42ff2cf3284376a3ecac22623e752d9e78

Browse files
Files changed (50) hide show
  1. hf_space/hf_space/hf_space/hf_space/app.py +88 -49
  2. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +31 -39
  3. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py +119 -88
  4. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +51 -62
  5. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py +101 -108
  6. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +68 -51
  7. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py +96 -76
  8. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/data/__init__.py +1 -0
  9. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +12 -0
  10. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/__init__.py +1 -0
  11. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py +215 -0
  12. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +110 -14
  13. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +273 -0
  14. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes +35 -0
  15. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile +20 -0
  16. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +19 -0
  17. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt +3 -0
  18. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/src/streamlit_app.py +40 -0
  19. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt +29 -3
  20. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py +199 -0
  21. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py +167 -0
  22. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py +1 -0
  23. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py +150 -0
  24. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/backtest.py +193 -0
  25. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/conviction.py +93 -0
  26. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py +229 -0
  27. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/charts.py +144 -0
  28. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/calendar.py +91 -0
  29. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/__init__.py +1 -0
  30. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py +1 -0
  31. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/__init__.py +1 -0
  32. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/__init__.py +1 -0
  33. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/__init__.py +1 -0
  34. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/models/__init__.py +1 -0
  35. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py +1 -1
  36. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/__init__.py +1 -1
  37. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py +1 -1
  38. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/__init__.py +1 -1
  39. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/__init__.py +1 -1
  40. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py +216 -0
  41. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py +67 -93
  42. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py +28 -109
  43. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py +4 -1
  44. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py +31 -101
  45. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py +43 -0
  46. hf_space/hf_space/hf_space/hf_space/hf_space/strategy/backtest.py +2 -0
  47. hf_space/hf_space/hf_space/ui/components.py +108 -153
  48. hf_space/hf_space/ui/charts.py +31 -83
  49. hf_space/models/base.py +108 -81
  50. models/approach1_wavelet.py +17 -9
hf_space/hf_space/hf_space/hf_space/app.py CHANGED
@@ -13,7 +13,9 @@ from data.loader import (load_dataset, check_data_freshness,
13
  get_features_and_targets, dataset_summary)
14
  from utils.calendar import get_est_time, get_next_signal_date
15
  from models.base import (build_sequences, train_val_test_split,
16
- scale_features, returns_to_labels)
 
 
17
  from models.approach1_wavelet import train_approach1, predict_approach1
18
  from models.approach2_regime import train_approach2, predict_approach2
19
  from models.approach3_multiscale import train_approach3, predict_approach3
@@ -39,8 +41,7 @@ with st.sidebar:
39
 
40
  start_yr = st.slider("📅 Start Year", 2010, 2024, 2016)
41
  fee_bps = st.slider("💰 Fee (bps)", 0, 50, 10)
42
- lookback = st.slider("📐 Lookback (days)", 20, 60, 30, step=5)
43
- epochs = st.number_input("🔁 Max Epochs", 20, 300, 100, step=10)
44
 
45
  st.divider()
46
  split_option = st.selectbox("📊 Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0)
@@ -109,7 +110,7 @@ st.info(
109
  f"**T-bill:** {tbill_rate*100:.2f}%"
110
  )
111
 
112
- # ── Build sequences ───────────────────────────────────────────────────────────
113
  X_raw = df[input_features].values.astype(np.float32)
114
  y_raw = df[target_etfs].values.astype(np.float32)
115
 
@@ -117,39 +118,74 @@ for j in range(X_raw.shape[1]):
117
  mask = np.isnan(X_raw[:, j])
118
  if mask.any():
119
  X_raw[mask, j] = np.nanmean(X_raw[:, j])
120
-
121
  for j in range(y_raw.shape[1]):
122
  mask = np.isnan(y_raw[:, j])
123
  if mask.any():
124
  y_raw[mask, j] = np.nanmean(y_raw[:, j])
125
 
126
- X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
127
- y_labels = returns_to_labels(y_seq, include_cash=include_cash)
128
-
129
- (X_train, y_train_r, X_val, y_val_r,
130
- X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
131
- (_, y_train_l, _, y_val_l,
132
- _, _) = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
133
-
134
- X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
135
-
136
- train_size = len(X_train)
137
- val_size = len(X_val)
138
- test_start = lookback + train_size + val_size
139
- test_dates = df.index[test_start: test_start + len(X_test)]
140
- test_slice = slice(test_start, test_start + len(X_test))
141
-
142
- st.success(f"✅ Sequences — Train: {train_size:,} · Val: {val_size:,} · Test: {len(X_test):,}")
143
 
144
- # ── Train all three approaches ────────────────────────────────────────────────
145
- results = {}
146
- trained_info = {}
147
- progress = st.progress(0, text="Starting training...")
148
-
149
- # Approach 1
150
- with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  try:
152
- model1, hist1, _ = train_approach1(
153
  X_train_s, y_train_l, X_val_s, y_val_l,
154
  n_classes=n_classes, epochs=int(epochs),
155
  )
@@ -159,17 +195,15 @@ with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
159
  target_etfs, fee_bps, tbill_rate, include_cash,
160
  )
161
  trained_info["Approach 1"] = {"proba": proba1}
162
- st.success("✅ Approach 1 complete")
163
  except Exception as e:
164
  st.warning(f"⚠️ Approach 1 failed: {e}")
165
  results["Approach 1"] = None
166
 
167
- progress.progress(33, text="Approach 1 done...")
168
 
169
- # Approach 2
170
- with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
171
  try:
172
- model2, hist2, hmm2, regime_cols2 = train_approach2(
173
  X_train_s, y_train_l, X_val_s, y_val_l,
174
  X_flat_all=X_raw, feature_names=input_features,
175
  lookback=lookback, train_size=train_size, val_size=val_size,
@@ -184,17 +218,15 @@ with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
184
  target_etfs, fee_bps, tbill_rate, include_cash,
185
  )
186
  trained_info["Approach 2"] = {"proba": proba2}
187
- st.success("✅ Approach 2 complete")
188
  except Exception as e:
189
  st.warning(f"⚠️ Approach 2 failed: {e}")
190
  results["Approach 2"] = None
191
 
192
- progress.progress(66, text="Approach 2 done...")
193
 
194
- # Approach 3
195
- with st.spinner("📡 Training Approach 3 — Multi-Scale CNN-LSTM..."):
196
  try:
197
- model3, hist3 = train_approach3(
198
  X_train_s, y_train_l, X_val_s, y_val_l,
199
  n_classes=n_classes, epochs=int(epochs),
200
  )
@@ -204,13 +236,20 @@ with st.spinner("📡 Training Approach 3 — Multi-Scale CNN-LSTM..."):
204
  target_etfs, fee_bps, tbill_rate, include_cash,
205
  )
206
  trained_info["Approach 3"] = {"proba": proba3}
207
- st.success("✅ Approach 3 complete")
208
  except Exception as e:
209
  st.warning(f"⚠️ Approach 3 failed: {e}")
210
  results["Approach 3"] = None
211
 
212
- progress.progress(100, text="All approaches complete!")
213
- progress.empty()
 
 
 
 
 
 
 
 
214
 
215
  # ── Select winner ─────────────────────────────────────────────────────────────
216
  winner_name = select_winner(results)
@@ -226,14 +265,14 @@ st.divider()
226
  # ── Winner signal banner ──────────────────────────────────────────────────────
227
  show_signal_banner(winner_res["next_signal"], next_date, winner_name)
228
 
229
- # ── Conviction panel (winner only) ────────────────────────────────────────────
230
  winner_proba = trained_info[winner_name]["proba"]
231
  conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash)
232
  show_conviction_panel(conviction)
233
 
234
  st.divider()
235
 
236
- # ── All models' next day signals ──────────────────────────────────────────────
237
  all_signals = {
238
  name: {
239
  "signal": res["next_signal"],
@@ -242,7 +281,7 @@ all_signals = {
242
  }
243
  for name, res in results.items() if res is not None
244
  }
245
- show_all_signals_panel(all_signals, target_etfs, include_cash, next_date)
246
 
247
  st.divider()
248
 
@@ -259,13 +298,13 @@ show_comparison_table(comparison_df)
259
 
260
  st.divider()
261
 
262
- # ── Equity curves ─────────────────────────────────────────────────────────────
263
- st.subheader("📈 Out-of-Sample Equity Curves All Approaches vs Benchmarks")
264
  fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
265
  st.plotly_chart(fig, use_container_width=True)
266
 
267
  st.divider()
268
 
269
- # ── Audit trail (winner) ──────────────────────────────────────────────────────
270
  st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
271
  show_audit_trail(winner_res["audit_trail"])
 
13
  get_features_and_targets, dataset_summary)
14
  from utils.calendar import get_est_time, get_next_signal_date
15
  from models.base import (build_sequences, train_val_test_split,
16
+ scale_features, returns_to_labels,
17
+ find_best_lookback, make_cache_key,
18
+ save_cache, load_cache)
19
  from models.approach1_wavelet import train_approach1, predict_approach1
20
  from models.approach2_regime import train_approach2, predict_approach2
21
  from models.approach3_multiscale import train_approach3, predict_approach3
 
41
 
42
  start_yr = st.slider("📅 Start Year", 2010, 2024, 2016)
43
  fee_bps = st.slider("💰 Fee (bps)", 0, 50, 10)
44
+ epochs = st.number_input("🔁 Max Epochs", 20, 150, 80, step=10)
 
45
 
46
  st.divider()
47
  split_option = st.selectbox("📊 Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0)
 
110
  f"**T-bill:** {tbill_rate*100:.2f}%"
111
  )
112
 
113
+ # ── Prepare raw arrays ────────────────────────────────────────────────────────
114
  X_raw = df[input_features].values.astype(np.float32)
115
  y_raw = df[target_etfs].values.astype(np.float32)
116
 
 
118
  mask = np.isnan(X_raw[:, j])
119
  if mask.any():
120
  X_raw[mask, j] = np.nanmean(X_raw[:, j])
 
121
  for j in range(y_raw.shape[1]):
122
  mask = np.isnan(y_raw[:, j])
123
  if mask.any():
124
  y_raw[mask, j] = np.nanmean(y_raw[:, j])
125
 
126
+ # ── Auto-select optimal lookback ──────────────────────────────────────────────
127
+ last_date_str = str(freshness.get("last_date_in_data", "unknown"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
+ # Check cache for lookback selection too
130
+ lb_cache_key = make_cache_key(
131
+ last_date_str, start_yr, fee_bps, int(epochs), split_option, include_cash, 0
132
+ )
133
+ lb_cached = load_cache(f"lb_{lb_cache_key}")
134
+
135
+ if lb_cached is not None:
136
+ optimal_lookback = lb_cached["optimal_lookback"]
137
+ st.success(f"⚡ Loaded from cache · Optimal lookback: **{optimal_lookback}d**")
138
+ else:
139
+ with st.spinner("🔍 Finding optimal lookback (30 / 45 / 60d)..."):
140
+ def _y_labels_fn(y_seq):
141
+ return returns_to_labels(y_seq, include_cash=include_cash)
142
+ optimal_lookback = find_best_lookback(
143
+ X_raw, y_raw, _y_labels_fn,
144
+ train_pct, val_pct, n_classes, include_cash,
145
+ candidates=[30, 45, 60],
146
+ )
147
+ save_cache(f"lb_{lb_cache_key}", {"optimal_lookback": optimal_lookback})
148
+ st.success(f"📐 Optimal lookback: **{optimal_lookback}d** (auto-selected from 30/45/60)")
149
+
150
+ lookback = optimal_lookback
151
+
152
+ # ── Check full model cache ────────────────────────────────────────────────────
153
+ cache_key = make_cache_key(last_date_str, start_yr, fee_bps, int(epochs),
154
+ split_option, include_cash, lookback)
155
+ cached_data = load_cache(cache_key)
156
+ from_cache = cached_data is not None
157
+
158
+ if from_cache:
159
+ results = cached_data["results"]
160
+ trained_info = cached_data["trained_info"]
161
+ test_dates = pd.DatetimeIndex(cached_data["test_dates"])
162
+ test_slice = cached_data["test_slice"]
163
+ st.success("⚡ Results loaded from cache — no retraining needed.")
164
+ else:
165
+ # ── Build sequences ───────────────────────────────────────────────────────
166
+ X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
167
+ y_labels = returns_to_labels(y_seq, include_cash=include_cash)
168
+
169
+ (X_train, y_train_r, X_val, y_val_r,
170
+ X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
171
+ (_, y_train_l, _, y_val_l,
172
+ _, _) = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
173
+
174
+ X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
175
+
176
+ train_size = len(X_train)
177
+ val_size = len(X_val)
178
+ test_start = lookback + train_size + val_size
179
+ test_dates = df.index[test_start: test_start + len(X_test)]
180
+ test_slice = slice(test_start, test_start + len(X_test))
181
+
182
+ results = {}
183
+ trained_info = {}
184
+ progress = st.progress(0, text="Training Approach 1...")
185
+
186
+ # ── Approach 1 ────────────────────────────────────────────────────────────
187
  try:
188
+ model1, _, _ = train_approach1(
189
  X_train_s, y_train_l, X_val_s, y_val_l,
190
  n_classes=n_classes, epochs=int(epochs),
191
  )
 
195
  target_etfs, fee_bps, tbill_rate, include_cash,
196
  )
197
  trained_info["Approach 1"] = {"proba": proba1}
 
198
  except Exception as e:
199
  st.warning(f"⚠️ Approach 1 failed: {e}")
200
  results["Approach 1"] = None
201
 
202
+ progress.progress(33, text="Training Approach 2...")
203
 
204
+ # ── Approach 2 ────────────────────────────────────────────────────────────
 
205
  try:
206
+ model2, _, hmm2, regime_cols2 = train_approach2(
207
  X_train_s, y_train_l, X_val_s, y_val_l,
208
  X_flat_all=X_raw, feature_names=input_features,
209
  lookback=lookback, train_size=train_size, val_size=val_size,
 
218
  target_etfs, fee_bps, tbill_rate, include_cash,
219
  )
220
  trained_info["Approach 2"] = {"proba": proba2}
 
221
  except Exception as e:
222
  st.warning(f"⚠️ Approach 2 failed: {e}")
223
  results["Approach 2"] = None
224
 
225
+ progress.progress(66, text="Training Approach 3...")
226
 
227
+ # ── Approach 3 ────────────────────────────────────────────────────────────
 
228
  try:
229
+ model3, _ = train_approach3(
230
  X_train_s, y_train_l, X_val_s, y_val_l,
231
  n_classes=n_classes, epochs=int(epochs),
232
  )
 
236
  target_etfs, fee_bps, tbill_rate, include_cash,
237
  )
238
  trained_info["Approach 3"] = {"proba": proba3}
 
239
  except Exception as e:
240
  st.warning(f"⚠️ Approach 3 failed: {e}")
241
  results["Approach 3"] = None
242
 
243
+ progress.progress(100, text="Done!")
244
+ progress.empty()
245
+
246
+ # ── Save to cache ─────────────────────────────────────────────────────────
247
+ save_cache(cache_key, {
248
+ "results": results,
249
+ "trained_info": trained_info,
250
+ "test_dates": list(test_dates),
251
+ "test_slice": test_slice,
252
+ })
253
 
254
  # ── Select winner ─────────────────────────────────────────────────────────────
255
  winner_name = select_winner(results)
 
265
  # ── Winner signal banner ──────────────────────────────────────────────────────
266
  show_signal_banner(winner_res["next_signal"], next_date, winner_name)
267
 
268
+ # ── Conviction panel ──────────────────────────────────────────────────────────
269
  winner_proba = trained_info[winner_name]["proba"]
270
  conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash)
271
  show_conviction_panel(conviction)
272
 
273
  st.divider()
274
 
275
+ # ── All models next day signals ──────────────────────────────────────────────
276
  all_signals = {
277
  name: {
278
  "signal": res["next_signal"],
 
281
  }
282
  for name, res in results.items() if res is not None
283
  }
284
+ show_all_signals_panel(all_signals, target_etfs, include_cash, next_date, optimal_lookback)
285
 
286
  st.divider()
287
 
 
298
 
299
  st.divider()
300
 
301
+ # ── Equity curve ─────────────────────────────────────────────────────────────
302
+ st.subheader(f"📈 {winner_name} vs SPY & AGG Out-of-Sample")
303
  fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
304
  st.plotly_chart(fig, use_container_width=True)
305
 
306
  st.divider()
307
 
308
+ # ── Audit trail ───────────────────────────────────────────────────────────────
309
  st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
310
  show_audit_trail(winner_res["audit_trail"])
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py CHANGED
@@ -11,7 +11,7 @@ import numpy as np
11
 
12
  from data.loader import (load_dataset, check_data_freshness,
13
  get_features_and_targets, dataset_summary)
14
- from utils.calendar import get_est_time, is_sync_window, get_next_signal_date
15
  from models.base import (build_sequences, train_val_test_split,
16
  scale_features, returns_to_labels)
17
  from models.approach1_wavelet import train_approach1, predict_approach1
@@ -22,8 +22,9 @@ from signals.conviction import compute_conviction
22
  from ui.components import (
23
  show_freshness_status, show_signal_banner, show_conviction_panel,
24
  show_metrics_row, show_comparison_table, show_audit_trail,
 
25
  )
26
- from ui.charts import equity_curve_chart, comparison_bar_chart
27
 
28
  st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="🧠", layout="wide")
29
 
@@ -34,12 +35,8 @@ with st.sidebar:
34
  st.header("⚙️ Configuration")
35
  now_est = get_est_time()
36
  st.write(f"🕒 **EST:** {now_est.strftime('%H:%M:%S')}")
37
- if is_sync_window():
38
- st.success("✅ Sync Window Active")
39
- else:
40
- st.info("⏸️ Sync Window Inactive")
41
-
42
  st.divider()
 
43
  start_yr = st.slider("📅 Start Year", 2010, 2024, 2016)
44
  fee_bps = st.slider("💰 Fee (bps)", 0, 50, 10)
45
  lookback = st.slider("📐 Lookback (days)", 20, 60, 30, step=5)
@@ -87,9 +84,6 @@ with st.sidebar:
87
  st.write(f"**Macro:** {', '.join(summary['macro_found'])}")
88
  st.write(f"**T-bill col:** {'✅' if summary['tbill_found'] else '❌'}")
89
 
90
- with st.expander("🔍 All columns"):
91
- st.write(summary["all_cols"])
92
-
93
  if not run_button:
94
  st.info("👈 Configure parameters and click **🚀 Run All 3 Approaches**.")
95
  st.stop()
@@ -101,7 +95,7 @@ st.write(f"📅 **Data:** {df.index[0].strftime('%Y-%m-%d')} → {df.index[-1].s
101
 
102
  # ── Features & targets ────────────────────────────────────────────────────────
103
  try:
104
- input_features, target_etfs, tbill_rate, df, col_info = get_features_and_targets(df)
105
  except ValueError as e:
106
  st.error(str(e))
107
  st.stop()
@@ -109,18 +103,6 @@ except ValueError as e:
109
  n_etfs = len(target_etfs)
110
  n_classes = n_etfs + (1 if include_cash else 0)
111
 
112
- # ── Show column detection diagnostics ────────────────────────────────────────
113
- with st.expander("🔬 Column detection diagnostics", expanded=False):
114
- st.write("**How each ETF column was interpreted:**")
115
- for col, info in col_info.items():
116
- st.write(f"- `{col}`: {info}")
117
- st.write(f"**Input features ({len(input_features)}):** {input_features}")
118
- st.write(f"**T-bill rate used:** {tbill_rate*100:.3f}%")
119
-
120
- # Show sample return values to verify correctness
121
- st.write("**Sample target return values (last 3 rows):**")
122
- st.dataframe(df[target_etfs].tail(3))
123
-
124
  st.info(
125
  f"🎯 **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} · "
126
  f"**Features:** {len(input_features)} signals · "
@@ -131,19 +113,15 @@ st.info(
131
  X_raw = df[input_features].values.astype(np.float32)
132
  y_raw = df[target_etfs].values.astype(np.float32)
133
 
134
- # Fill NaNs
135
- col_means = np.nanmean(X_raw, axis=0)
136
  for j in range(X_raw.shape[1]):
137
  mask = np.isnan(X_raw[:, j])
138
  if mask.any():
139
- X_raw[mask, j] = col_means[j]
140
 
141
- # Also fill NaNs in y_raw
142
- y_means = np.nanmean(y_raw, axis=0)
143
  for j in range(y_raw.shape[1]):
144
  mask = np.isnan(y_raw[:, j])
145
  if mask.any():
146
- y_raw[mask, j] = y_means[j]
147
 
148
  X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
149
  y_labels = returns_to_labels(y_seq, include_cash=include_cash)
@@ -151,7 +129,7 @@ y_labels = returns_to_labels(y_seq, include_cash=include_cash)
151
  (X_train, y_train_r, X_val, y_val_r,
152
  X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
153
  (_, y_train_l, _, y_val_l,
154
- _, y_test_l) = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
155
 
156
  X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
157
 
@@ -163,14 +141,6 @@ test_slice = slice(test_start, test_start + len(X_test))
163
 
164
  st.success(f"✅ Sequences — Train: {train_size:,} · Val: {val_size:,} · Test: {len(X_test):,}")
165
 
166
- # Show class distribution to check for degenerate labels
167
- with st.expander("🔬 Label distribution (train set)", expanded=False):
168
- unique, counts = np.unique(y_train_l, return_counts=True)
169
- label_names = [target_etfs[i].replace("_Ret","") if i < n_etfs else "CASH" for i in unique]
170
- dist_df = pd.DataFrame({"Class": label_names, "Count": counts,
171
- "Pct": (counts / counts.sum() * 100).round(1)})
172
- st.dataframe(dist_df)
173
-
174
  # ── Train all three approaches ────────────────────────────────────────────────
175
  results = {}
176
  trained_info = {}
@@ -253,27 +223,49 @@ if winner_res is None:
253
  next_date = get_next_signal_date()
254
  st.divider()
255
 
 
256
  show_signal_banner(winner_res["next_signal"], next_date, winner_name)
257
 
 
258
  winner_proba = trained_info[winner_name]["proba"]
259
  conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash)
260
  show_conviction_panel(conviction)
261
 
262
  st.divider()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  st.subheader(f"📊 {winner_name} — Performance Metrics")
264
  show_metrics_row(winner_res, tbill_rate)
265
 
266
  st.divider()
 
 
267
  st.subheader("🏆 Approach Comparison (Winner = Highest Raw Annualised Return)")
268
  comparison_df = build_comparison_table(results, winner_name)
269
  show_comparison_table(comparison_df)
270
- st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
271
 
272
  st.divider()
 
 
273
  st.subheader("📈 Out-of-Sample Equity Curves — All Approaches vs Benchmarks")
274
  fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
275
  st.plotly_chart(fig, use_container_width=True)
276
 
277
  st.divider()
 
 
278
  st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
279
  show_audit_trail(winner_res["audit_trail"])
 
11
 
12
  from data.loader import (load_dataset, check_data_freshness,
13
  get_features_and_targets, dataset_summary)
14
+ from utils.calendar import get_est_time, get_next_signal_date
15
  from models.base import (build_sequences, train_val_test_split,
16
  scale_features, returns_to_labels)
17
  from models.approach1_wavelet import train_approach1, predict_approach1
 
22
  from ui.components import (
23
  show_freshness_status, show_signal_banner, show_conviction_panel,
24
  show_metrics_row, show_comparison_table, show_audit_trail,
25
+ show_all_signals_panel,
26
  )
27
+ from ui.charts import equity_curve_chart
28
 
29
  st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="🧠", layout="wide")
30
 
 
35
  st.header("⚙️ Configuration")
36
  now_est = get_est_time()
37
  st.write(f"🕒 **EST:** {now_est.strftime('%H:%M:%S')}")
 
 
 
 
 
38
  st.divider()
39
+
40
  start_yr = st.slider("📅 Start Year", 2010, 2024, 2016)
41
  fee_bps = st.slider("💰 Fee (bps)", 0, 50, 10)
42
  lookback = st.slider("📐 Lookback (days)", 20, 60, 30, step=5)
 
84
  st.write(f"**Macro:** {', '.join(summary['macro_found'])}")
85
  st.write(f"**T-bill col:** {'✅' if summary['tbill_found'] else '❌'}")
86
 
 
 
 
87
  if not run_button:
88
  st.info("👈 Configure parameters and click **🚀 Run All 3 Approaches**.")
89
  st.stop()
 
95
 
96
  # ── Features & targets ────────────────────────────────────────────────────────
97
  try:
98
+ input_features, target_etfs, tbill_rate, df, _ = get_features_and_targets(df)
99
  except ValueError as e:
100
  st.error(str(e))
101
  st.stop()
 
103
  n_etfs = len(target_etfs)
104
  n_classes = n_etfs + (1 if include_cash else 0)
105
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  st.info(
107
  f"🎯 **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} · "
108
  f"**Features:** {len(input_features)} signals · "
 
113
  X_raw = df[input_features].values.astype(np.float32)
114
  y_raw = df[target_etfs].values.astype(np.float32)
115
 
 
 
116
  for j in range(X_raw.shape[1]):
117
  mask = np.isnan(X_raw[:, j])
118
  if mask.any():
119
+ X_raw[mask, j] = np.nanmean(X_raw[:, j])
120
 
 
 
121
  for j in range(y_raw.shape[1]):
122
  mask = np.isnan(y_raw[:, j])
123
  if mask.any():
124
+ y_raw[mask, j] = np.nanmean(y_raw[:, j])
125
 
126
  X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
127
  y_labels = returns_to_labels(y_seq, include_cash=include_cash)
 
129
  (X_train, y_train_r, X_val, y_val_r,
130
  X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
131
  (_, y_train_l, _, y_val_l,
132
+ _, _) = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
133
 
134
  X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
135
 
 
141
 
142
  st.success(f"✅ Sequences — Train: {train_size:,} · Val: {val_size:,} · Test: {len(X_test):,}")
143
 
 
 
 
 
 
 
 
 
144
  # ── Train all three approaches ────────────────────────────────────────────────
145
  results = {}
146
  trained_info = {}
 
223
  next_date = get_next_signal_date()
224
  st.divider()
225
 
226
+ # ── Winner signal banner ──────────────────────────────────────────────────────
227
  show_signal_banner(winner_res["next_signal"], next_date, winner_name)
228
 
229
+ # ── Conviction panel (winner only) ────────────────────────────────────────────
230
  winner_proba = trained_info[winner_name]["proba"]
231
  conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash)
232
  show_conviction_panel(conviction)
233
 
234
  st.divider()
235
+
236
+ # ── All models' next day signals ──────────────────────────────────────────────
237
+ all_signals = {
238
+ name: {
239
+ "signal": res["next_signal"],
240
+ "proba": trained_info[name]["proba"][-1],
241
+ "is_winner": name == winner_name,
242
+ }
243
+ for name, res in results.items() if res is not None
244
+ }
245
+ show_all_signals_panel(all_signals, target_etfs, include_cash, next_date)
246
+
247
+ st.divider()
248
+
249
+ # ── Winner performance metrics ────────────────────────────────────────────────
250
  st.subheader(f"📊 {winner_name} — Performance Metrics")
251
  show_metrics_row(winner_res, tbill_rate)
252
 
253
  st.divider()
254
+
255
+ # ── Comparison table ──────────────────────────────────────────────────────────
256
  st.subheader("🏆 Approach Comparison (Winner = Highest Raw Annualised Return)")
257
  comparison_df = build_comparison_table(results, winner_name)
258
  show_comparison_table(comparison_df)
 
259
 
260
  st.divider()
261
+
262
+ # ── Equity curves ─────────────────────────────────────────────────────────────
263
  st.subheader("📈 Out-of-Sample Equity Curves — All Approaches vs Benchmarks")
264
  fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
265
  st.plotly_chart(fig, use_container_width=True)
266
 
267
  st.divider()
268
+
269
+ # ── Audit trail (winner) ──────────────────────────────────────────────────────
270
  st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
271
  show_audit_trail(winner_res["audit_trail"])
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py CHANGED
@@ -1,12 +1,8 @@
1
  """
2
  data/loader.py
3
  Loads master_data.parquet from HF Dataset.
4
- Validates freshness against the last NYSE trading day.
5
- No external pings — all data comes from HF Dataset only.
6
-
7
- Actual dataset columns (confirmed from parquet inspection):
8
- ETFs : AGG, GLD, SLV, SPY, TBT, TLT, VNQ
9
- Macro : VIX, DXY, T10Y2Y, TBILL_3M, IG_SPREAD, HY_SPREAD
10
  """
11
 
12
  import pandas as pd
@@ -22,9 +18,8 @@ try:
22
  except ImportError:
23
  NYSE_CAL_AVAILABLE = False
24
 
25
- DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
26
- PARQUET_FILE = "master_data.parquet"
27
-
28
  TARGET_ETF_COLS = ["TLT", "TBT", "VNQ", "SLV", "GLD"]
29
  BENCHMARK_COLS = ["SPY", "AGG"]
30
  TBILL_COL = "TBILL_3M"
@@ -64,16 +59,13 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
64
  token=hf_token,
65
  )
66
  df = pd.read_parquet(path)
67
-
68
  if not isinstance(df.index, pd.DatetimeIndex):
69
  for col in ["Date", "date", "DATE"]:
70
  if col in df.columns:
71
  df = df.set_index(col)
72
  break
73
  df.index = pd.to_datetime(df.index)
74
-
75
  return df.sort_index()
76
-
77
  except Exception as e:
78
  st.error(f"❌ Failed to load dataset: {e}")
79
  return pd.DataFrame()
@@ -84,11 +76,9 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
84
  def check_data_freshness(df: pd.DataFrame) -> dict:
85
  if df.empty:
86
  return {"fresh": False, "message": "Dataset is empty."}
87
-
88
  last = df.index[-1].date()
89
  expect = get_last_nyse_trading_day()
90
  fresh = last >= expect
91
-
92
  msg = (
93
  f"✅ Dataset up to date through **{last}**." if fresh else
94
  f"⚠️ **{expect}** data not yet updated. Latest: **{last}**. "
@@ -98,106 +88,139 @@ def check_data_freshness(df: pd.DataFrame) -> dict:
98
  "expected_date": expect, "message": msg}
99
 
100
 
101
- # ── Detect whether a column holds prices or returns ───────────────────────────
102
 
103
- def _is_price_series(series: pd.Series) -> bool:
104
- """
105
- Heuristic: a price series has abs(median) > 2 and std/mean < 0.5.
106
- A return series has abs(median) < 0.1 and many values near zero.
107
- """
108
  clean = series.dropna()
109
  if len(clean) == 0:
110
- return False
111
- med = abs(clean.median())
112
- # Strong price signal: median > 2 (e.g. TLT ~ 90, TBT ~ 20)
113
- if med > 2:
114
- return True
115
- # Strong return signal: most values between -0.2 and 0.2
116
- if (clean.abs() < 0.2).mean() > 0.9:
117
- return False
118
- return med > 0.5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
 
 
120
 
121
- # ── Feature / target extraction ───────────────────────────────────────────────
 
 
 
122
 
123
  def get_features_and_targets(df: pd.DataFrame):
124
  """
125
- Build return columns for target ETFs and benchmarks.
126
- Auto-detects whether source columns are prices or already returns.
127
 
128
  Returns:
129
  input_features : list[str]
130
  target_etfs : list[str] e.g. ["TLT_Ret", ...]
131
  tbill_rate : float
132
- df : DataFrame with _Ret columns added
133
- col_info : dict of diagnostics for sidebar display
134
  """
135
  missing = [c for c in TARGET_ETF_COLS if c not in df.columns]
136
  if missing:
137
  raise ValueError(
138
  f"Missing ETF columns: {missing}. "
139
- f"Found in dataset: {list(df.columns)}"
140
  )
141
 
142
  col_info = {}
143
 
144
- # ── Build _Ret columns ────────────────────────────────────────────────────
145
- def make_ret(col):
 
146
  ret_col = f"{col}_Ret"
147
- if ret_col in df.columns:
148
- col_info[col] = "pre-computed _Ret"
149
- return ret_col
150
- if _is_price_series(df[col]):
151
- df[ret_col] = df[col].pct_change()
152
- col_info[col] = f"price→pct_change (median={df[col].median():.2f})"
153
- else:
154
- df[ret_col] = df[col]
155
- col_info[col] = f"used as-is (median={df[col].median():.4f})"
156
- return ret_col
157
-
158
- target_etfs = [make_ret(c) for c in TARGET_ETF_COLS]
159
- benchmark_rets = [make_ret(c) for c in BENCHMARK_COLS if c in df.columns]
160
-
161
- # Drop NaN rows (first row from pct_change)
162
  df = df.dropna(subset=target_etfs).copy()
163
 
164
- # Sanity check: target returns should be small daily values
165
- for ret_col in target_etfs:
166
- med = df[ret_col].abs().median()
167
- if med > 0.1:
168
- st.warning(
169
- f"⚠️ {ret_col} has median absolute value {med:.4f} — "
170
- f"these may not be daily returns. Check dataset column '{ret_col.replace('_Ret','')}'. "
171
- f"Sample values: {df[ret_col].tail(3).values}"
172
- )
173
-
174
- # ── Input features ────────────────────────────────────���───────────────────
175
- exclude = set(
176
- TARGET_ETF_COLS + BENCHMARK_COLS + target_etfs + benchmark_rets +
177
- [f"{c}_Ret" for c in BENCHMARK_COLS] + [TBILL_COL]
178
- )
179
 
180
- # First try known macro columns
181
- input_features = [c for c in MACRO_COLS if c in df.columns and c not in exclude]
182
-
183
- # Then add any engineered signal columns
184
- extra = [
185
- c for c in df.columns
186
- if c not in exclude
187
- and c not in input_features
188
- and any(k in c for k in ["_Z", "_Vol", "Regime", "YC_", "Credit_",
189
- "Rates_", "VIX_", "Spread", "DXY", "T10Y",
190
- "TBILL", "SOFR", "MOVE"])
191
- and pd.api.types.is_numeric_dtype(df[c])
192
- ]
193
- input_features += extra
194
-
195
- # Fallback: all numeric non-excluded columns
196
- if not input_features:
197
- input_features = [
198
- c for c in df.columns
199
- if c not in exclude and pd.api.types.is_numeric_dtype(df[c])
200
- ]
201
 
202
  # ── T-bill rate ───────────────────────────────────────────────────────────
203
  tbill_rate = 0.045
@@ -207,6 +230,14 @@ def get_features_and_targets(df: pd.DataFrame):
207
  v = float(raw.iloc[-1])
208
  tbill_rate = v / 100 if v > 1 else v
209
 
 
 
 
 
 
 
 
 
210
  return input_features, target_etfs, tbill_rate, df, col_info
211
 
212
 
 
1
  """
2
  data/loader.py
3
  Loads master_data.parquet from HF Dataset.
4
+ Engineers rich feature set from raw price/macro columns.
5
+ No external pings — all data from HF Dataset only.
 
 
 
 
6
  """
7
 
8
  import pandas as pd
 
18
  except ImportError:
19
  NYSE_CAL_AVAILABLE = False
20
 
21
+ DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
22
+ PARQUET_FILE = "master_data.parquet"
 
23
  TARGET_ETF_COLS = ["TLT", "TBT", "VNQ", "SLV", "GLD"]
24
  BENCHMARK_COLS = ["SPY", "AGG"]
25
  TBILL_COL = "TBILL_3M"
 
59
  token=hf_token,
60
  )
61
  df = pd.read_parquet(path)
 
62
  if not isinstance(df.index, pd.DatetimeIndex):
63
  for col in ["Date", "date", "DATE"]:
64
  if col in df.columns:
65
  df = df.set_index(col)
66
  break
67
  df.index = pd.to_datetime(df.index)
 
68
  return df.sort_index()
 
69
  except Exception as e:
70
  st.error(f"❌ Failed to load dataset: {e}")
71
  return pd.DataFrame()
 
76
  def check_data_freshness(df: pd.DataFrame) -> dict:
77
  if df.empty:
78
  return {"fresh": False, "message": "Dataset is empty."}
 
79
  last = df.index[-1].date()
80
  expect = get_last_nyse_trading_day()
81
  fresh = last >= expect
 
82
  msg = (
83
  f"✅ Dataset up to date through **{last}**." if fresh else
84
  f"⚠️ **{expect}** data not yet updated. Latest: **{last}**. "
 
88
  "expected_date": expect, "message": msg}
89
 
90
 
91
+ # ── Price returns ───────────────────────────────────────────────────────────
92
 
93
+ def _to_returns(series: pd.Series) -> pd.Series:
94
+ """Convert price series to daily pct returns. If already returns, pass through."""
 
 
 
95
  clean = series.dropna()
96
  if len(clean) == 0:
97
+ return series
98
+ if abs(clean.median()) > 2: # price series
99
+ return series.pct_change()
100
+ return series # already returns
101
+
102
+
103
+ # ── Feature engineering ───────────────────────────────────────────────────────
104
+
105
+ def _engineer_features(df: pd.DataFrame, ret_cols: list) -> pd.DataFrame:
106
+ """
107
+ Build a rich feature set from raw macro + ETF return columns.
108
+
109
+ Features added per ETF return:
110
+ - 1d, 5d, 21d lagged returns
111
+ - 5d, 21d rolling volatility
112
+ - 5d, 21d momentum (cumulative return)
113
+
114
+ Features added per macro column:
115
+ - raw value (z-scored over rolling 252d window)
116
+ - 5d change
117
+ - 1d lag
118
+
119
+ Also adds:
120
+ - TBILL_3M as a feature (rate level)
121
+ - VIX regime flag (VIX > 25)
122
+ - Yield curve slope (already T10Y2Y)
123
+ - Cross-asset momentum: spread between TLT_ret and TBT_ret
124
+ """
125
+ feat = pd.DataFrame(index=df.index)
126
+
127
+ # ── ETF return features ───────────────────────────────────────────────────
128
+ for col in ret_cols:
129
+ r = df[col]
130
+ feat[f"{col}_lag1"] = r.shift(1)
131
+ feat[f"{col}_lag5"] = r.shift(5)
132
+ feat[f"{col}_lag21"] = r.shift(21)
133
+ feat[f"{col}_vol5"] = r.rolling(5).std()
134
+ feat[f"{col}_vol21"] = r.rolling(21).std()
135
+ feat[f"{col}_mom5"] = r.rolling(5).sum()
136
+ feat[f"{col}_mom21"] = r.rolling(21).sum()
137
+
138
+ # ── Macro features ────────────────────────────────────────────────────────
139
+ for col in MACRO_COLS:
140
+ if col not in df.columns:
141
+ continue
142
+ s = df[col]
143
+ # Z-score over rolling 252-day window
144
+ roll_mean = s.rolling(252, min_periods=63).mean()
145
+ roll_std = s.rolling(252, min_periods=63).std()
146
+ feat[f"{col}_z"] = (s - roll_mean) / (roll_std + 1e-9)
147
+ feat[f"{col}_chg5"] = s.diff(5)
148
+ feat[f"{col}_lag1"] = s.shift(1)
149
+
150
+ # ── TBILL level ───────────────────────────────────────────────────────────
151
+ if TBILL_COL in df.columns:
152
+ tbill = df[TBILL_COL]
153
+ feat["TBILL_level"] = tbill
154
+ feat["TBILL_chg5"] = tbill.diff(5)
155
+
156
+ # ── Derived cross-asset signals ───────────────────────────────────────────
157
+ if "TLT_Ret" in df.columns and "TBT_Ret" in df.columns:
158
+ feat["TLT_TBT_spread_mom5"] = (
159
+ df["TLT_Ret"].rolling(5).sum() - df["TBT_Ret"].rolling(5).sum()
160
+ )
161
+
162
+ if "VIX" in df.columns:
163
+ feat["VIX_regime"] = (df["VIX"] > 25).astype(float)
164
+ feat["VIX_mom5"] = df["VIX"].diff(5)
165
+
166
+ if "T10Y2Y" in df.columns:
167
+ feat["YC_inverted"] = (df["T10Y2Y"] < 0).astype(float)
168
 
169
+ if "IG_SPREAD" in df.columns and "HY_SPREAD" in df.columns:
170
+ feat["credit_ratio"] = df["HY_SPREAD"] / (df["IG_SPREAD"] + 1e-9)
171
 
172
+ return feat
173
+
174
+
175
+ # ── Main extraction function ──────────────────────────────────────────────────
176
 
177
  def get_features_and_targets(df: pd.DataFrame):
178
  """
179
+ Build return columns for target ETFs and engineer a rich feature set.
 
180
 
181
  Returns:
182
  input_features : list[str]
183
  target_etfs : list[str] e.g. ["TLT_Ret", ...]
184
  tbill_rate : float
185
+ df_out : DataFrame with all columns
186
+ col_info : dict of diagnostics
187
  """
188
  missing = [c for c in TARGET_ETF_COLS if c not in df.columns]
189
  if missing:
190
  raise ValueError(
191
  f"Missing ETF columns: {missing}. "
192
+ f"Found: {list(df.columns)}"
193
  )
194
 
195
  col_info = {}
196
 
197
+ # ── Build ETF return columns ──────────────────────────────────────────────
198
+ target_etfs = []
199
+ for col in TARGET_ETF_COLS:
200
  ret_col = f"{col}_Ret"
201
+ df[ret_col] = _to_returns(df[col])
202
+ med = abs(df[col].dropna().median())
203
+ col_info[col] = f"price→pct_change (median={med:.2f})" if med > 2 else f"used as-is (median={med:.4f})"
204
+ target_etfs.append(ret_col)
205
+
206
+ # ── Build benchmark return columns ────────────────────────────────────────
207
+ for col in BENCHMARK_COLS:
208
+ if col in df.columns:
209
+ df[f"{col}_Ret"] = _to_returns(df[col])
210
+
211
+ # ── Drop NaN from first pct_change row ────────────────────────────────────
 
 
 
 
212
  df = df.dropna(subset=target_etfs).copy()
213
 
214
+ # ── Engineer features ─────────────────────────────────────────────────────
215
+ feat_df = _engineer_features(df, target_etfs)
216
+
217
+ # Merge features into df
218
+ for col in feat_df.columns:
219
+ df[col] = feat_df[col].values
 
 
 
 
 
 
 
 
 
220
 
221
+ # Drop rows with NaN in features (from lags/rolling)
222
+ feat_cols = list(feat_df.columns)
223
+ df = df.dropna(subset=feat_cols).copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
  # ── T-bill rate ───────────────────────────────────────────────────────────
226
  tbill_rate = 0.045
 
230
  v = float(raw.iloc[-1])
231
  tbill_rate = v / 100 if v > 1 else v
232
 
233
+ # Input features = all engineered feature columns
234
+ exclude = set(
235
+ TARGET_ETF_COLS + BENCHMARK_COLS + target_etfs +
236
+ [f"{c}_Ret" for c in BENCHMARK_COLS] + [TBILL_COL] +
237
+ list(MACRO_COLS)
238
+ )
239
+ input_features = [c for c in feat_cols if c not in exclude]
240
+
241
  return input_features, target_etfs, tbill_rate, df, col_info
242
 
243
 
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py CHANGED
@@ -9,7 +9,6 @@ import streamlit as st
9
  import pandas as pd
10
  import numpy as np
11
 
12
- # ── Module imports ────────────────────────────────────────────────────────────
13
  from data.loader import (load_dataset, check_data_freshness,
14
  get_features_and_targets, dataset_summary)
15
  from utils.calendar import get_est_time, is_sync_window, get_next_signal_date
@@ -26,20 +25,13 @@ from ui.components import (
26
  )
27
  from ui.charts import equity_curve_chart, comparison_bar_chart
28
 
29
- # ── Page config ───────────────────────────────────────────────────────────────
30
- st.set_page_config(
31
- page_title="P2-ETF-CNN-LSTM",
32
- page_icon="🧠",
33
- layout="wide",
34
- )
35
 
36
- # ── Secrets ───────────────────────────────────────────────────────────────────
37
  HF_TOKEN = os.getenv("HF_TOKEN", "")
38
 
39
  # ── Sidebar ───────────────────────────────────────────────────────────────────
40
  with st.sidebar:
41
  st.header("⚙️ Configuration")
42
-
43
  now_est = get_est_time()
44
  st.write(f"🕒 **EST:** {now_est.strftime('%H:%M:%S')}")
45
  if is_sync_window():
@@ -48,25 +40,19 @@ with st.sidebar:
48
  st.info("⏸️ Sync Window Inactive")
49
 
50
  st.divider()
51
-
52
  start_yr = st.slider("📅 Start Year", 2010, 2024, 2016)
53
  fee_bps = st.slider("💰 Fee (bps)", 0, 50, 10)
54
  lookback = st.slider("📐 Lookback (days)", 20, 60, 30, step=5)
55
  epochs = st.number_input("🔁 Max Epochs", 20, 300, 100, step=10)
56
 
57
  st.divider()
58
-
59
  split_option = st.selectbox("📊 Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0)
60
- split_map = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}
61
- train_pct, val_pct = split_map[split_option]
62
 
63
- include_cash = st.checkbox(
64
- "💵 Include CASH class", value=True,
65
- help="Model can select CASH (earns T-bill rate) instead of any ETF",
66
- )
67
 
68
  st.divider()
69
-
70
  run_button = st.button("🚀 Run All 3 Approaches", type="primary", use_container_width=True)
71
 
72
  # ── Title ─────────────────────────────────────────────────────────────────────
@@ -74,9 +60,8 @@ st.title("🧠 P2-ETF-CNN-LSTM")
74
  st.caption("Approach 1: Wavelet · Approach 2: Regime-Conditioned · Approach 3: Multi-Scale Parallel")
75
  st.caption("Winner selected by highest raw annualised return on out-of-sample test set.")
76
 
77
- # ── Token check ───────────────────────────────────────────────────────────────
78
  if not HF_TOKEN:
79
- st.error("❌ HF_TOKEN secret not found. Add it to HF Space / GitHub secrets.")
80
  st.stop()
81
 
82
  # ── Load dataset ──────────────────────────────────────────────────────────────
@@ -86,11 +71,10 @@ with st.spinner("📡 Loading dataset from HuggingFace..."):
86
  if df_raw.empty:
87
  st.stop()
88
 
89
- # ── Freshness check ───────────────────────────────────────────────────────────
90
  freshness = check_data_freshness(df_raw)
91
  show_freshness_status(freshness)
92
 
93
- # ── Dataset summary in sidebar ────────────────────────────────────────────────
94
  with st.sidebar:
95
  st.divider()
96
  st.subheader("📦 Dataset Info")
@@ -103,21 +87,21 @@ with st.sidebar:
103
  st.write(f"**Macro:** {', '.join(summary['macro_found'])}")
104
  st.write(f"**T-bill col:** {'✅' if summary['tbill_found'] else '❌'}")
105
 
106
- # ── Wait for run button ─────────────────────────────────────────────��─────────
 
 
107
  if not run_button:
108
- st.info("👈 Configure parameters in the sidebar and click **🚀 Run All 3 Approaches**.")
109
  st.stop()
110
 
111
  # ── Filter by start year ──────────────────────────────────────────────────────
112
  df = df_raw[df_raw.index.year >= start_yr].copy()
113
- st.write(
114
- f"📅 **Data:** {df.index[0].strftime('%Y-%m-%d')} → {df.index[-1].strftime('%Y-%m-%d')} "
115
- f"({df.index[-1].year - df.index[0].year + 1} years)"
116
- )
117
 
118
  # ── Features & targets ────────────────────────────────────────────────────────
119
  try:
120
- input_features, target_etfs, tbill_rate, df = get_features_and_targets(df)
121
  except ValueError as e:
122
  st.error(str(e))
123
  st.stop()
@@ -125,6 +109,18 @@ except ValueError as e:
125
  n_etfs = len(target_etfs)
126
  n_classes = n_etfs + (1 if include_cash else 0)
127
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  st.info(
129
  f"🎯 **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} · "
130
  f"**Features:** {len(input_features)} signals · "
@@ -135,13 +131,20 @@ st.info(
135
  X_raw = df[input_features].values.astype(np.float32)
136
  y_raw = df[target_etfs].values.astype(np.float32)
137
 
138
- # Fill any remaining NaNs with column means
139
  col_means = np.nanmean(X_raw, axis=0)
140
  for j in range(X_raw.shape[1]):
141
  mask = np.isnan(X_raw[:, j])
142
  if mask.any():
143
  X_raw[mask, j] = col_means[j]
144
 
 
 
 
 
 
 
 
145
  X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
146
  y_labels = returns_to_labels(y_seq, include_cash=include_cash)
147
 
@@ -154,27 +157,30 @@ X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
154
 
155
  train_size = len(X_train)
156
  val_size = len(X_val)
157
-
158
  test_start = lookback + train_size + val_size
159
  test_dates = df.index[test_start: test_start + len(X_test)]
160
  test_slice = slice(test_start, test_start + len(X_test))
161
 
162
- st.success(
163
- f"✅ Sequences — Train: {train_size:,} · Val: {val_size:,} · Test: {len(X_test):,}"
164
- )
 
 
 
 
 
 
165
 
166
  # ── Train all three approaches ────────────────────────────────────────────────
167
  results = {}
168
  trained_info = {}
 
169
 
170
- progress = st.progress(0, text="Starting training...")
171
-
172
- # ── Approach 1 ────────────────────────────────────────────────────────────────
173
  with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
174
  try:
175
  model1, hist1, _ = train_approach1(
176
- X_train_s, y_train_l,
177
- X_val_s, y_val_l,
178
  n_classes=n_classes, epochs=int(epochs),
179
  )
180
  preds1, proba1 = predict_approach1(model1, X_test_s)
@@ -190,17 +196,13 @@ with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
190
 
191
  progress.progress(33, text="Approach 1 done...")
192
 
193
- # ── Approach 2 ─��──────────────────────────────────────────────────────────────
194
  with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
195
  try:
196
  model2, hist2, hmm2, regime_cols2 = train_approach2(
197
- X_train_s, y_train_l,
198
- X_val_s, y_val_l,
199
- X_flat_all=X_raw,
200
- feature_names=input_features,
201
- lookback=lookback,
202
- train_size=train_size,
203
- val_size=val_size,
204
  n_classes=n_classes, epochs=int(epochs),
205
  )
206
  preds2, proba2 = predict_approach2(
@@ -219,12 +221,11 @@ with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
219
 
220
  progress.progress(66, text="Approach 2 done...")
221
 
222
- # ── Approach 3 ────────────────────────────────────────────────────────────────
223
  with st.spinner("📡 Training Approach 3 — Multi-Scale CNN-LSTM..."):
224
  try:
225
  model3, hist3 = train_approach3(
226
- X_train_s, y_train_l,
227
- X_val_s, y_val_l,
228
  n_classes=n_classes, epochs=int(epochs),
229
  )
230
  preds3, proba3 = predict_approach3(model3, X_test_s)
@@ -250,41 +251,29 @@ if winner_res is None:
250
  st.stop()
251
 
252
  next_date = get_next_signal_date()
253
-
254
  st.divider()
255
 
256
- # ── Signal banner ─────────────────────────────────────────────────────────────
257
  show_signal_banner(winner_res["next_signal"], next_date, winner_name)
258
 
259
- # ── Conviction panel ──────────────────────────────────────────────────────────
260
  winner_proba = trained_info[winner_name]["proba"]
261
  conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash)
262
  show_conviction_panel(conviction)
263
 
264
  st.divider()
265
-
266
- # ── Winner metrics ────────────────────────────────────────────────────────────
267
  st.subheader(f"📊 {winner_name} — Performance Metrics")
268
  show_metrics_row(winner_res, tbill_rate)
269
 
270
  st.divider()
271
-
272
- # ── Comparison table ──────────────────────────────────────────────────────────
273
  st.subheader("🏆 Approach Comparison (Winner = Highest Raw Annualised Return)")
274
  comparison_df = build_comparison_table(results, winner_name)
275
  show_comparison_table(comparison_df)
276
-
277
  st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
278
 
279
  st.divider()
280
-
281
- # ── Equity curves ─────────────────────────────────────────────────────────────
282
  st.subheader("📈 Out-of-Sample Equity Curves — All Approaches vs Benchmarks")
283
  fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
284
  st.plotly_chart(fig, use_container_width=True)
285
 
286
  st.divider()
287
-
288
- # ── Audit trail ───────────────────────────────────────────────────────────────
289
  st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
290
  show_audit_trail(winner_res["audit_trail"])
 
9
  import pandas as pd
10
  import numpy as np
11
 
 
12
  from data.loader import (load_dataset, check_data_freshness,
13
  get_features_and_targets, dataset_summary)
14
  from utils.calendar import get_est_time, is_sync_window, get_next_signal_date
 
25
  )
26
  from ui.charts import equity_curve_chart, comparison_bar_chart
27
 
28
+ st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="🧠", layout="wide")
 
 
 
 
 
29
 
 
30
  HF_TOKEN = os.getenv("HF_TOKEN", "")
31
 
32
  # ── Sidebar ───────────────────────────────────────────────────────────────────
33
  with st.sidebar:
34
  st.header("⚙️ Configuration")
 
35
  now_est = get_est_time()
36
  st.write(f"🕒 **EST:** {now_est.strftime('%H:%M:%S')}")
37
  if is_sync_window():
 
40
  st.info("⏸️ Sync Window Inactive")
41
 
42
  st.divider()
 
43
  start_yr = st.slider("📅 Start Year", 2010, 2024, 2016)
44
  fee_bps = st.slider("💰 Fee (bps)", 0, 50, 10)
45
  lookback = st.slider("📐 Lookback (days)", 20, 60, 30, step=5)
46
  epochs = st.number_input("🔁 Max Epochs", 20, 300, 100, step=10)
47
 
48
  st.divider()
 
49
  split_option = st.selectbox("📊 Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0)
50
+ train_pct, val_pct = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}[split_option]
 
51
 
52
+ include_cash = st.checkbox("💵 Include CASH class", value=True,
53
+ help="Model can select CASH (earns T-bill rate) instead of any ETF")
 
 
54
 
55
  st.divider()
 
56
  run_button = st.button("🚀 Run All 3 Approaches", type="primary", use_container_width=True)
57
 
58
  # ── Title ─────────────────────────────────────────────────────────────────────
 
60
  st.caption("Approach 1: Wavelet · Approach 2: Regime-Conditioned · Approach 3: Multi-Scale Parallel")
61
  st.caption("Winner selected by highest raw annualised return on out-of-sample test set.")
62
 
 
63
  if not HF_TOKEN:
64
+ st.error("❌ HF_TOKEN secret not found.")
65
  st.stop()
66
 
67
  # ── Load dataset ──────────────────────────────────────────────────────────────
 
71
  if df_raw.empty:
72
  st.stop()
73
 
 
74
  freshness = check_data_freshness(df_raw)
75
  show_freshness_status(freshness)
76
 
77
+ # ── Dataset info sidebar ──────────────────────────────────────────────────────
78
  with st.sidebar:
79
  st.divider()
80
  st.subheader("📦 Dataset Info")
 
87
  st.write(f"**Macro:** {', '.join(summary['macro_found'])}")
88
  st.write(f"**T-bill col:** {'✅' if summary['tbill_found'] else '❌'}")
89
 
90
+ with st.expander("🔍 All columns"):
91
+ st.write(summary["all_cols"])
92
+
93
  if not run_button:
94
+ st.info("👈 Configure parameters and click **🚀 Run All 3 Approaches**.")
95
  st.stop()
96
 
97
  # ── Filter by start year ──────────────────────────────────────────────────────
98
  df = df_raw[df_raw.index.year >= start_yr].copy()
99
+ st.write(f"📅 **Data:** {df.index[0].strftime('%Y-%m-%d')} → {df.index[-1].strftime('%Y-%m-%d')} "
100
+ f"({df.index[-1].year - df.index[0].year + 1} years)")
 
 
101
 
102
  # ── Features & targets ────────────────────────────────────────────────────────
103
  try:
104
+ input_features, target_etfs, tbill_rate, df, col_info = get_features_and_targets(df)
105
  except ValueError as e:
106
  st.error(str(e))
107
  st.stop()
 
109
  n_etfs = len(target_etfs)
110
  n_classes = n_etfs + (1 if include_cash else 0)
111
 
112
+ # ── Show column detection diagnostics ────────────────────────────────────────
113
+ with st.expander("🔬 Column detection diagnostics", expanded=False):
114
+ st.write("**How each ETF column was interpreted:**")
115
+ for col, info in col_info.items():
116
+ st.write(f"- `{col}`: {info}")
117
+ st.write(f"**Input features ({len(input_features)}):** {input_features}")
118
+ st.write(f"**T-bill rate used:** {tbill_rate*100:.3f}%")
119
+
120
+ # Show sample return values to verify correctness
121
+ st.write("**Sample target return values (last 3 rows):**")
122
+ st.dataframe(df[target_etfs].tail(3))
123
+
124
  st.info(
125
  f"🎯 **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} · "
126
  f"**Features:** {len(input_features)} signals · "
 
131
  X_raw = df[input_features].values.astype(np.float32)
132
  y_raw = df[target_etfs].values.astype(np.float32)
133
 
134
+ # Fill NaNs
135
  col_means = np.nanmean(X_raw, axis=0)
136
  for j in range(X_raw.shape[1]):
137
  mask = np.isnan(X_raw[:, j])
138
  if mask.any():
139
  X_raw[mask, j] = col_means[j]
140
 
141
+ # Also fill NaNs in y_raw
142
+ y_means = np.nanmean(y_raw, axis=0)
143
+ for j in range(y_raw.shape[1]):
144
+ mask = np.isnan(y_raw[:, j])
145
+ if mask.any():
146
+ y_raw[mask, j] = y_means[j]
147
+
148
  X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
149
  y_labels = returns_to_labels(y_seq, include_cash=include_cash)
150
 
 
157
 
158
  train_size = len(X_train)
159
  val_size = len(X_val)
 
160
  test_start = lookback + train_size + val_size
161
  test_dates = df.index[test_start: test_start + len(X_test)]
162
  test_slice = slice(test_start, test_start + len(X_test))
163
 
164
+ st.success(f"✅ Sequences — Train: {train_size:,} · Val: {val_size:,} · Test: {len(X_test):,}")
165
+
166
+ # Show class distribution to check for degenerate labels
167
+ with st.expander("🔬 Label distribution (train set)", expanded=False):
168
+ unique, counts = np.unique(y_train_l, return_counts=True)
169
+ label_names = [target_etfs[i].replace("_Ret","") if i < n_etfs else "CASH" for i in unique]
170
+ dist_df = pd.DataFrame({"Class": label_names, "Count": counts,
171
+ "Pct": (counts / counts.sum() * 100).round(1)})
172
+ st.dataframe(dist_df)
173
 
174
  # ── Train all three approaches ────────────────────────────────────────────────
175
  results = {}
176
  trained_info = {}
177
+ progress = st.progress(0, text="Starting training...")
178
 
179
+ # Approach 1
 
 
180
  with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
181
  try:
182
  model1, hist1, _ = train_approach1(
183
+ X_train_s, y_train_l, X_val_s, y_val_l,
 
184
  n_classes=n_classes, epochs=int(epochs),
185
  )
186
  preds1, proba1 = predict_approach1(model1, X_test_s)
 
196
 
197
  progress.progress(33, text="Approach 1 done...")
198
 
199
+ # Approach 2
200
  with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
201
  try:
202
  model2, hist2, hmm2, regime_cols2 = train_approach2(
203
+ X_train_s, y_train_l, X_val_s, y_val_l,
204
+ X_flat_all=X_raw, feature_names=input_features,
205
+ lookback=lookback, train_size=train_size, val_size=val_size,
 
 
 
 
206
  n_classes=n_classes, epochs=int(epochs),
207
  )
208
  preds2, proba2 = predict_approach2(
 
221
 
222
  progress.progress(66, text="Approach 2 done...")
223
 
224
+ # Approach 3
225
  with st.spinner("📡 Training Approach 3 — Multi-Scale CNN-LSTM..."):
226
  try:
227
  model3, hist3 = train_approach3(
228
+ X_train_s, y_train_l, X_val_s, y_val_l,
 
229
  n_classes=n_classes, epochs=int(epochs),
230
  )
231
  preds3, proba3 = predict_approach3(model3, X_test_s)
 
251
  st.stop()
252
 
253
  next_date = get_next_signal_date()
 
254
  st.divider()
255
 
 
256
  show_signal_banner(winner_res["next_signal"], next_date, winner_name)
257
 
 
258
  winner_proba = trained_info[winner_name]["proba"]
259
  conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash)
260
  show_conviction_panel(conviction)
261
 
262
  st.divider()
 
 
263
  st.subheader(f"📊 {winner_name} — Performance Metrics")
264
  show_metrics_row(winner_res, tbill_rate)
265
 
266
  st.divider()
 
 
267
  st.subheader("🏆 Approach Comparison (Winner = Highest Raw Annualised Return)")
268
  comparison_df = build_comparison_table(results, winner_name)
269
  show_comparison_table(comparison_df)
 
270
  st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
271
 
272
  st.divider()
 
 
273
  st.subheader("📈 Out-of-Sample Equity Curves — All Approaches vs Benchmarks")
274
  fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
275
  st.plotly_chart(fig, use_container_width=True)
276
 
277
  st.divider()
 
 
278
  st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
279
  show_audit_trail(winner_res["audit_trail"])
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py CHANGED
@@ -4,7 +4,7 @@ Loads master_data.parquet from HF Dataset.
4
  Validates freshness against the last NYSE trading day.
5
  No external pings — all data comes from HF Dataset only.
6
 
7
- Actual dataset columns (from parquet inspection):
8
  ETFs : AGG, GLD, SLV, SPY, TBT, TLT, VNQ
9
  Macro : VIX, DXY, T10Y2Y, TBILL_3M, IG_SPREAD, HY_SPREAD
10
  """
@@ -15,7 +15,6 @@ import streamlit as st
15
  from huggingface_hub import hf_hub_download
16
  from datetime import datetime, timedelta
17
  import pytz
18
- import os
19
 
20
  try:
21
  import pandas_market_calendars as mcal
@@ -26,33 +25,27 @@ except ImportError:
26
  DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
27
  PARQUET_FILE = "master_data.parquet"
28
 
29
- # ── Actual column names in the dataset ───────────────────────────────────────
30
- TARGET_ETF_COLS = ["TLT", "TBT", "VNQ", "SLV", "GLD"] # traded ETFs
31
- BENCHMARK_COLS = ["SPY", "AGG"] # chart only
32
- TBILL_COL = "TBILL_3M" # 3m T-bill rate
33
- MACRO_COLS = ["VIX", "DXY", "T10Y2Y", "IG_SPREAD", "HY_SPREAD"]
34
 
35
 
36
- # ── NYSE calendar helpers ─────────────────────────────────────────────────────
37
 
38
  def get_last_nyse_trading_day(as_of=None):
39
- """Return the most recent NYSE trading day on or before as_of (default: today EST)."""
40
  est = pytz.timezone("US/Eastern")
41
  if as_of is None:
42
  as_of = datetime.now(est)
43
  today = as_of.date()
44
-
45
  if NYSE_CAL_AVAILABLE:
46
  try:
47
  nyse = mcal.get_calendar("NYSE")
48
- start = today - timedelta(days=10)
49
- sched = nyse.schedule(start_date=start, end_date=today)
50
  if len(sched) > 0:
51
  return sched.index[-1].date()
52
  except Exception:
53
  pass
54
-
55
- # Fallback: skip weekends
56
  candidate = today
57
  while candidate.weekday() >= 5:
58
  candidate -= timedelta(days=1)
@@ -63,10 +56,6 @@ def get_last_nyse_trading_day(as_of=None):
63
 
64
  @st.cache_data(ttl=3600, show_spinner=False)
65
  def load_dataset(hf_token: str) -> pd.DataFrame:
66
- """
67
- Download master_data.parquet from HF Dataset and return as DataFrame.
68
- Cached for 1 hour. Index is parsed as DatetimeIndex.
69
- """
70
  try:
71
  path = hf_hub_download(
72
  repo_id=DATASET_REPO,
@@ -76,7 +65,6 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
76
  )
77
  df = pd.read_parquet(path)
78
 
79
- # Ensure DatetimeIndex
80
  if not isinstance(df.index, pd.DatetimeIndex):
81
  for col in ["Date", "date", "DATE"]:
82
  if col in df.columns:
@@ -84,66 +72,66 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
84
  break
85
  df.index = pd.to_datetime(df.index)
86
 
87
- df = df.sort_index()
88
- return df
89
 
90
  except Exception as e:
91
- st.error(f"❌ Failed to load dataset from HuggingFace: {e}")
92
  return pd.DataFrame()
93
 
94
 
95
  # ── Freshness check ───────────────────────────────────────────────────────────
96
 
97
  def check_data_freshness(df: pd.DataFrame) -> dict:
98
- """
99
- Check whether the dataset contains data for the last NYSE trading day.
100
- """
101
  if df.empty:
102
- return {
103
- "fresh": False,
104
- "last_date_in_data": None,
105
- "expected_date": None,
106
- "message": "Dataset is empty.",
107
- }
108
-
109
- last_date_in_data = df.index[-1].date()
110
- expected_date = get_last_nyse_trading_day()
111
- fresh = last_date_in_data >= expected_date
112
-
113
- if fresh:
114
- message = f"✅ Dataset is up to date through **{last_date_in_data}**."
115
- else:
116
- message = (
117
- f"⚠️ **{expected_date}** data not yet updated in dataset. "
118
- f"Latest available: **{last_date_in_data}**. "
119
- f"Please check back later — the dataset updates daily after market close."
120
- )
121
 
122
- return {
123
- "fresh": fresh,
124
- "last_date_in_data": last_date_in_data,
125
- "expected_date": expected_date,
126
- "message": message,
127
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
 
130
  # ── Feature / target extraction ───────────────────────────────────────────────
131
 
132
  def get_features_and_targets(df: pd.DataFrame):
133
  """
134
- Extract input feature columns and target ETF return columns.
135
-
136
- The dataset stores raw price or return values directly under ticker names.
137
- We compute daily log returns for target ETFs if they are not already returns.
138
 
139
  Returns:
140
- input_features : list of column names to use as model inputs
141
- target_etfs : list of ETF column names (after return computation)
142
- tbill_rate : latest 3m T-bill rate as float (annualised, e.g. 0.045)
143
- df : DataFrame (possibly with new _Ret columns added)
 
144
  """
145
-
146
- # ── Confirm target ETFs exist ─────────────────────────────────────────────
147
  missing = [c for c in TARGET_ETF_COLS if c not in df.columns]
148
  if missing:
149
  raise ValueError(
@@ -151,71 +139,75 @@ def get_features_and_targets(df: pd.DataFrame):
151
  f"Found in dataset: {list(df.columns)}"
152
  )
153
 
154
- # ── Build return columns ──────────────────────────────────────────────────
155
- # If values look like prices (>5), compute pct returns.
156
- # If they already look like small returns (<1 in abs), use as-is.
157
- target_etfs = []
158
- for col in TARGET_ETF_COLS:
159
- ret_col = f"{col}_Ret"
160
- if ret_col not in df.columns:
161
- sample = df[col].dropna()
162
- if len(sample) > 0 and abs(sample.median()) > 1:
163
- # Looks like price — compute pct change
164
- df[ret_col] = df[col].pct_change()
165
- else:
166
- # Already returns
167
- df[ret_col] = df[col]
168
- target_etfs.append(ret_col)
169
-
170
- # Same for benchmarks
171
- for col in BENCHMARK_COLS:
172
- ret_col = f"{col}_Ret"
173
- if ret_col not in df.columns and col in df.columns:
174
- sample = df[col].dropna()
175
- if len(sample) > 0 and abs(sample.median()) > 1:
176
- df[ret_col] = df[col].pct_change()
177
- else:
178
- df[ret_col] = df[col]
179
 
180
- # Drop rows with NaN in target columns (first row after pct_change)
181
- df = df.dropna(subset=target_etfs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  # ── Input features ────────────────────────────────────────────────────────
184
- # Use macro columns directly; exclude ETF price/return cols and benchmarks
185
  exclude = set(
186
- TARGET_ETF_COLS + BENCHMARK_COLS +
187
- target_etfs +
188
- [f"{c}_Ret" for c in BENCHMARK_COLS] +
189
- [TBILL_COL]
190
  )
191
 
192
- input_features = [
 
 
 
 
193
  c for c in df.columns
194
  if c not in exclude
195
- and c in (MACRO_COLS + [
196
- col for col in df.columns
197
- if any(k in col for k in ["_Z", "_Vol", "Regime", "YC_", "Credit_",
198
- "Rates_", "VIX_", "Spread", "DXY", "T10Y"])
199
- ])
200
  ]
 
201
 
202
- # Fallback: if none matched, use all non-excluded numeric columns
203
  if not input_features:
204
  input_features = [
205
  c for c in df.columns
206
- if c not in exclude
207
- and pd.api.types.is_numeric_dtype(df[c])
208
  ]
209
 
210
  # ── T-bill rate ───────────────────────────────────────────────────────────
211
- tbill_rate = 0.045 # default
212
  if TBILL_COL in df.columns:
213
  raw = df[TBILL_COL].dropna()
214
  if len(raw) > 0:
215
- last_val = float(raw.iloc[-1])
216
- tbill_rate = last_val / 100 if last_val > 1 else last_val
217
 
218
- return input_features, target_etfs, tbill_rate, df
219
 
220
 
221
  # ── Dataset summary ───────────────────────────────────────────────────────────
@@ -228,8 +220,9 @@ def dataset_summary(df: pd.DataFrame) -> dict:
228
  "columns": len(df.columns),
229
  "start_date": df.index[0].strftime("%Y-%m-%d"),
230
  "end_date": df.index[-1].strftime("%Y-%m-%d"),
231
- "etfs_found": [c for c in TARGET_ETF_COLS if c in df.columns],
232
- "benchmarks": [c for c in BENCHMARK_COLS if c in df.columns],
233
- "macro_found": [c for c in MACRO_COLS if c in df.columns],
234
  "tbill_found": TBILL_COL in df.columns,
 
235
  }
 
4
  Validates freshness against the last NYSE trading day.
5
  No external pings — all data comes from HF Dataset only.
6
 
7
+ Actual dataset columns (confirmed from parquet inspection):
8
  ETFs : AGG, GLD, SLV, SPY, TBT, TLT, VNQ
9
  Macro : VIX, DXY, T10Y2Y, TBILL_3M, IG_SPREAD, HY_SPREAD
10
  """
 
15
  from huggingface_hub import hf_hub_download
16
  from datetime import datetime, timedelta
17
  import pytz
 
18
 
19
  try:
20
  import pandas_market_calendars as mcal
 
25
  DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
26
  PARQUET_FILE = "master_data.parquet"
27
 
28
+ TARGET_ETF_COLS = ["TLT", "TBT", "VNQ", "SLV", "GLD"]
29
+ BENCHMARK_COLS = ["SPY", "AGG"]
30
+ TBILL_COL = "TBILL_3M"
31
+ MACRO_COLS = ["VIX", "DXY", "T10Y2Y", "IG_SPREAD", "HY_SPREAD"]
 
32
 
33
 
34
+ # ── NYSE calendar ─────────────────────────────────────────────────────────────
35
 
36
  def get_last_nyse_trading_day(as_of=None):
 
37
  est = pytz.timezone("US/Eastern")
38
  if as_of is None:
39
  as_of = datetime.now(est)
40
  today = as_of.date()
 
41
  if NYSE_CAL_AVAILABLE:
42
  try:
43
  nyse = mcal.get_calendar("NYSE")
44
+ sched = nyse.schedule(start_date=today - timedelta(days=10), end_date=today)
 
45
  if len(sched) > 0:
46
  return sched.index[-1].date()
47
  except Exception:
48
  pass
 
 
49
  candidate = today
50
  while candidate.weekday() >= 5:
51
  candidate -= timedelta(days=1)
 
56
 
57
  @st.cache_data(ttl=3600, show_spinner=False)
58
  def load_dataset(hf_token: str) -> pd.DataFrame:
 
 
 
 
59
  try:
60
  path = hf_hub_download(
61
  repo_id=DATASET_REPO,
 
65
  )
66
  df = pd.read_parquet(path)
67
 
 
68
  if not isinstance(df.index, pd.DatetimeIndex):
69
  for col in ["Date", "date", "DATE"]:
70
  if col in df.columns:
 
72
  break
73
  df.index = pd.to_datetime(df.index)
74
 
75
+ return df.sort_index()
 
76
 
77
  except Exception as e:
78
+ st.error(f"❌ Failed to load dataset: {e}")
79
  return pd.DataFrame()
80
 
81
 
82
  # ── Freshness check ───────────────────────────────────────────────────────────
83
 
84
  def check_data_freshness(df: pd.DataFrame) -> dict:
 
 
 
85
  if df.empty:
86
+ return {"fresh": False, "message": "Dataset is empty."}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ last = df.index[-1].date()
89
+ expect = get_last_nyse_trading_day()
90
+ fresh = last >= expect
91
+
92
+ msg = (
93
+ f"✅ Dataset up to date through **{last}**." if fresh else
94
+ f"⚠️ **{expect}** data not yet updated. Latest: **{last}**. "
95
+ f"Dataset updates daily after market close."
96
+ )
97
+ return {"fresh": fresh, "last_date_in_data": last,
98
+ "expected_date": expect, "message": msg}
99
+
100
+
101
+ # ── Detect whether a column holds prices or returns ───────────────────────────
102
+
103
+ def _is_price_series(series: pd.Series) -> bool:
104
+ """
105
+ Heuristic: a price series has abs(median) > 2 and std/mean < 0.5.
106
+ A return series has abs(median) < 0.1 and many values near zero.
107
+ """
108
+ clean = series.dropna()
109
+ if len(clean) == 0:
110
+ return False
111
+ med = abs(clean.median())
112
+ # Strong price signal: median > 2 (e.g. TLT ~ 90, TBT ~ 20)
113
+ if med > 2:
114
+ return True
115
+ # Strong return signal: most values between -0.2 and 0.2
116
+ if (clean.abs() < 0.2).mean() > 0.9:
117
+ return False
118
+ return med > 0.5
119
 
120
 
121
  # ── Feature / target extraction ───────────────────────────────────────────────
122
 
123
  def get_features_and_targets(df: pd.DataFrame):
124
  """
125
+ Build return columns for target ETFs and benchmarks.
126
+ Auto-detects whether source columns are prices or already returns.
 
 
127
 
128
  Returns:
129
+ input_features : list[str]
130
+ target_etfs : list[str] e.g. ["TLT_Ret", ...]
131
+ tbill_rate : float
132
+ df : DataFrame with _Ret columns added
133
+ col_info : dict of diagnostics for sidebar display
134
  """
 
 
135
  missing = [c for c in TARGET_ETF_COLS if c not in df.columns]
136
  if missing:
137
  raise ValueError(
 
139
  f"Found in dataset: {list(df.columns)}"
140
  )
141
 
142
+ col_info = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
+ # ── Build _Ret columns ────────────────────────────────────────────────────
145
+ def make_ret(col):
146
+ ret_col = f"{col}_Ret"
147
+ if ret_col in df.columns:
148
+ col_info[col] = "pre-computed _Ret"
149
+ return ret_col
150
+ if _is_price_series(df[col]):
151
+ df[ret_col] = df[col].pct_change()
152
+ col_info[col] = f"price→pct_change (median={df[col].median():.2f})"
153
+ else:
154
+ df[ret_col] = df[col]
155
+ col_info[col] = f"used as-is (median={df[col].median():.4f})"
156
+ return ret_col
157
+
158
+ target_etfs = [make_ret(c) for c in TARGET_ETF_COLS]
159
+ benchmark_rets = [make_ret(c) for c in BENCHMARK_COLS if c in df.columns]
160
+
161
+ # Drop NaN rows (first row from pct_change)
162
+ df = df.dropna(subset=target_etfs).copy()
163
+
164
+ # Sanity check: target returns should be small daily values
165
+ for ret_col in target_etfs:
166
+ med = df[ret_col].abs().median()
167
+ if med > 0.1:
168
+ st.warning(
169
+ f"⚠️ {ret_col} has median absolute value {med:.4f} — "
170
+ f"these may not be daily returns. Check dataset column '{ret_col.replace('_Ret','')}'. "
171
+ f"Sample values: {df[ret_col].tail(3).values}"
172
+ )
173
 
174
  # ── Input features ────────────────────────────────────────────────────────
 
175
  exclude = set(
176
+ TARGET_ETF_COLS + BENCHMARK_COLS + target_etfs + benchmark_rets +
177
+ [f"{c}_Ret" for c in BENCHMARK_COLS] + [TBILL_COL]
 
 
178
  )
179
 
180
+ # First try known macro columns
181
+ input_features = [c for c in MACRO_COLS if c in df.columns and c not in exclude]
182
+
183
+ # Then add any engineered signal columns
184
+ extra = [
185
  c for c in df.columns
186
  if c not in exclude
187
+ and c not in input_features
188
+ and any(k in c for k in ["_Z", "_Vol", "Regime", "YC_", "Credit_",
189
+ "Rates_", "VIX_", "Spread", "DXY", "T10Y",
190
+ "TBILL", "SOFR", "MOVE"])
191
+ and pd.api.types.is_numeric_dtype(df[c])
192
  ]
193
+ input_features += extra
194
 
195
+ # Fallback: all numeric non-excluded columns
196
  if not input_features:
197
  input_features = [
198
  c for c in df.columns
199
+ if c not in exclude and pd.api.types.is_numeric_dtype(df[c])
 
200
  ]
201
 
202
  # ── T-bill rate ───────────────────────────────────────────────────────────
203
+ tbill_rate = 0.045
204
  if TBILL_COL in df.columns:
205
  raw = df[TBILL_COL].dropna()
206
  if len(raw) > 0:
207
+ v = float(raw.iloc[-1])
208
+ tbill_rate = v / 100 if v > 1 else v
209
 
210
+ return input_features, target_etfs, tbill_rate, df, col_info
211
 
212
 
213
  # ── Dataset summary ───────────────────────────────────────────────────────────
 
220
  "columns": len(df.columns),
221
  "start_date": df.index[0].strftime("%Y-%m-%d"),
222
  "end_date": df.index[-1].strftime("%Y-%m-%d"),
223
+ "etfs_found": [c for c in TARGET_ETF_COLS if c in df.columns],
224
+ "benchmarks": [c for c in BENCHMARK_COLS if c in df.columns],
225
+ "macro_found": [c for c in MACRO_COLS if c in df.columns],
226
  "tbill_found": TBILL_COL in df.columns,
227
+ "all_cols": list(df.columns),
228
  }
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py CHANGED
@@ -10,9 +10,11 @@ import pandas as pd
10
  import numpy as np
11
 
12
  # ── Module imports ────────────────────────────────────────────────────────────
13
- from data.loader import load_dataset, check_data_freshness, get_features_and_targets, dataset_summary
 
14
  from utils.calendar import get_est_time, is_sync_window, get_next_signal_date
15
- from models.base import build_sequences, train_val_test_split, scale_features, returns_to_labels
 
16
  from models.approach1_wavelet import train_approach1, predict_approach1
17
  from models.approach2_regime import train_approach2, predict_approach2
18
  from models.approach3_multiscale import train_approach3, predict_approach3
@@ -47,10 +49,10 @@ with st.sidebar:
47
 
48
  st.divider()
49
 
50
- start_yr = st.slider("📅 Start Year", 2010, 2024, 2016)
51
- fee_bps = st.slider("💰 Fee (bps)", 0, 50, 10)
52
- lookback = st.slider("📐 Lookback (days)", 20, 60, 30, step=5)
53
- epochs = st.number_input("🔁 Max Epochs", 20, 300, 100, step=10)
54
 
55
  st.divider()
56
 
@@ -58,8 +60,10 @@ with st.sidebar:
58
  split_map = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}
59
  train_pct, val_pct = split_map[split_option]
60
 
61
- include_cash = st.checkbox("💵 Include CASH class", value=True,
62
- help="Model can select CASH (earns T-bill rate) as an alternative to any ETF")
 
 
63
 
64
  st.divider()
65
 
@@ -70,90 +74,102 @@ st.title("🧠 P2-ETF-CNN-LSTM")
70
  st.caption("Approach 1: Wavelet · Approach 2: Regime-Conditioned · Approach 3: Multi-Scale Parallel")
71
  st.caption("Winner selected by highest raw annualised return on out-of-sample test set.")
72
 
73
- # ── Load data (always, to check freshness) ────────────────────────────────────
74
  if not HF_TOKEN:
75
- st.error("❌ HF_TOKEN secret not found. Please add it to your HF Space / GitHub secrets.")
76
  st.stop()
77
 
 
78
  with st.spinner("📡 Loading dataset from HuggingFace..."):
79
- df = load_dataset(HF_TOKEN)
80
 
81
- if df.empty:
82
  st.stop()
83
 
84
  # ── Freshness check ───────────────────────────────────────────────────────────
85
- freshness = check_data_freshness(df)
86
  show_freshness_status(freshness)
87
 
88
  # ── Dataset summary in sidebar ────────────────────────────────────────────────
89
  with st.sidebar:
90
  st.divider()
91
  st.subheader("📦 Dataset Info")
92
- summary = dataset_summary(df)
93
  if summary:
94
  st.write(f"**Rows:** {summary['rows']:,}")
95
  st.write(f"**Range:** {summary['start_date']} → {summary['end_date']}")
96
- st.write(f"**ETFs:** {', '.join([e.replace('_Ret','') for e in summary['etfs_found']])}")
97
- st.write(f"**Benchmarks:** {', '.join([b.replace('_Ret','') for b in summary['benchmarks']])}")
 
98
  st.write(f"**T-bill col:** {'✅' if summary['tbill_found'] else '❌'}")
99
 
100
- # ── Main execution ────────────────────────────────────────────────────────────
101
  if not run_button:
102
- st.info("👈 Configure parameters in the sidebar and click **🚀 Run All 3 Approaches** to begin.")
103
  st.stop()
104
 
105
  # ── Filter by start year ──────────────────────────────────────────────────────
106
- df = df[df.index.year >= start_yr].copy()
107
- st.write(f"📅 **Data:** {df.index[0].strftime('%Y-%m-%d')} → {df.index[-1].strftime('%Y-%m-%d')} "
108
- f"({df.index[-1].year - df.index[0].year + 1} years)")
 
 
109
 
110
- # ── Feature / target extraction ───────────────────────────────────────────────
111
  try:
112
- input_features, target_etfs, tbill_rate = get_features_and_targets(df)
113
  except ValueError as e:
114
  st.error(str(e))
115
  st.stop()
116
 
117
- st.info(f"🎯 **Targets:** {len(target_etfs)} ETFs · **Features:** {len(input_features)} signals · "
118
- f"**T-bill rate:** {tbill_rate*100:.2f}%")
 
 
 
 
 
 
119
 
120
- # ── Prepare sequences ─────────────────────────────────────────────────────────
121
- X_raw = df[input_features].values.astype(np.float32)
122
- y_raw = df[target_etfs].values.astype(np.float32)
123
- n_etfs = len(target_etfs)
124
- n_classes = n_etfs + (1 if include_cash else 0) # +1 for CASH
125
 
126
- # Fill NaNs with column means
127
  col_means = np.nanmean(X_raw, axis=0)
128
  for j in range(X_raw.shape[1]):
129
  mask = np.isnan(X_raw[:, j])
130
- X_raw[mask, j] = col_means[j]
 
131
 
132
  X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
133
  y_labels = returns_to_labels(y_seq, include_cash=include_cash)
134
 
135
- X_train, y_train_r, X_val, y_val_r, X_test, y_test_r = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
136
- _, y_train_l, _, y_val_l, _, y_test_l = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
 
 
137
 
138
  X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
139
 
140
  train_size = len(X_train)
141
  val_size = len(X_val)
142
 
143
- # Test dates (aligned with y_test)
144
- test_start = lookback + train_size + val_size
145
- test_dates = df.index[test_start: test_start + len(X_test)]
146
- test_slice = slice(test_start, test_start + len(X_test))
147
 
148
- st.success(f"✅ Sequences — Train: {train_size} · Val: {val_size} · Test: {len(X_test)}")
 
 
149
 
150
  # ── Train all three approaches ────────────────────────────────────────────────
151
  results = {}
152
- trained_info = {} # store extra info needed for conviction
153
 
154
  progress = st.progress(0, text="Starting training...")
155
 
156
- # ── Approach 1: Wavelet ───────────────────────────────────────────────────────
157
  with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
158
  try:
159
  model1, hist1, _ = train_approach1(
@@ -163,7 +179,8 @@ with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
163
  )
164
  preds1, proba1 = predict_approach1(model1, X_test_s)
165
  results["Approach 1"] = execute_strategy(
166
- preds1, proba1, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash,
 
167
  )
168
  trained_info["Approach 1"] = {"proba": proba1}
169
  st.success("✅ Approach 1 complete")
@@ -173,7 +190,7 @@ with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
173
 
174
  progress.progress(33, text="Approach 1 done...")
175
 
176
- # ── Approach 2: Regime-Conditioned ───────────────────────────────────────────
177
  with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
178
  try:
179
  model2, hist2, hmm2, regime_cols2 = train_approach2(
@@ -191,7 +208,8 @@ with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
191
  lookback, train_size, val_size,
192
  )
193
  results["Approach 2"] = execute_strategy(
194
- preds2, proba2, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash,
 
195
  )
196
  trained_info["Approach 2"] = {"proba": proba2}
197
  st.success("✅ Approach 2 complete")
@@ -201,7 +219,7 @@ with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
201
 
202
  progress.progress(66, text="Approach 2 done...")
203
 
204
- # ── Approach 3: Multi-Scale ───────────────────────────────────────────────────
205
  with st.spinner("📡 Training Approach 3 — Multi-Scale CNN-LSTM..."):
206
  try:
207
  model3, hist3 = train_approach3(
@@ -211,7 +229,8 @@ with st.spinner("📡 Training Approach 3 — Multi-Scale CNN-LSTM..."):
211
  )
212
  preds3, proba3 = predict_approach3(model3, X_test_s)
213
  results["Approach 3"] = execute_strategy(
214
- preds3, proba3, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash,
 
215
  )
216
  trained_info["Approach 3"] = {"proba": proba3}
217
  st.success("✅ Approach 3 complete")
@@ -227,15 +246,14 @@ winner_name = select_winner(results)
227
  winner_res = results.get(winner_name)
228
 
229
  if winner_res is None:
230
- st.error("❌ All approaches failed. Please check your data and configuration.")
231
  st.stop()
232
 
233
- # ── Next trading date ─────────────────────────────────────────────────────────
234
  next_date = get_next_signal_date()
235
 
236
  st.divider()
237
 
238
- # ── Signal banner (winner) ────────────────────────────────────────────────────
239
  show_signal_banner(winner_res["next_signal"], next_date, winner_name)
240
 
241
  # ── Conviction panel ──────────────────────────────────────────────────────────
@@ -256,7 +274,6 @@ st.subheader("🏆 Approach Comparison (Winner = Highest Raw Annualised Return)"
256
  comparison_df = build_comparison_table(results, winner_name)
257
  show_comparison_table(comparison_df)
258
 
259
- # ── Comparison bar chart ──────────────────────────────────────────────────────
260
  st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
261
 
262
  st.divider()
@@ -268,6 +285,6 @@ st.plotly_chart(fig, use_container_width=True)
268
 
269
  st.divider()
270
 
271
- # ── Audit trail (winner) ──────────────────────────────────────────────────────
272
  st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
273
  show_audit_trail(winner_res["audit_trail"])
 
10
  import numpy as np
11
 
12
  # ── Module imports ────────────────────────────────────────────────────────────
13
+ from data.loader import (load_dataset, check_data_freshness,
14
+ get_features_and_targets, dataset_summary)
15
  from utils.calendar import get_est_time, is_sync_window, get_next_signal_date
16
+ from models.base import (build_sequences, train_val_test_split,
17
+ scale_features, returns_to_labels)
18
  from models.approach1_wavelet import train_approach1, predict_approach1
19
  from models.approach2_regime import train_approach2, predict_approach2
20
  from models.approach3_multiscale import train_approach3, predict_approach3
 
49
 
50
  st.divider()
51
 
52
+ start_yr = st.slider("📅 Start Year", 2010, 2024, 2016)
53
+ fee_bps = st.slider("💰 Fee (bps)", 0, 50, 10)
54
+ lookback = st.slider("📐 Lookback (days)", 20, 60, 30, step=5)
55
+ epochs = st.number_input("🔁 Max Epochs", 20, 300, 100, step=10)
56
 
57
  st.divider()
58
 
 
60
  split_map = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}
61
  train_pct, val_pct = split_map[split_option]
62
 
63
+ include_cash = st.checkbox(
64
+ "💵 Include CASH class", value=True,
65
+ help="Model can select CASH (earns T-bill rate) instead of any ETF",
66
+ )
67
 
68
  st.divider()
69
 
 
74
  st.caption("Approach 1: Wavelet · Approach 2: Regime-Conditioned · Approach 3: Multi-Scale Parallel")
75
  st.caption("Winner selected by highest raw annualised return on out-of-sample test set.")
76
 
77
+ # ── Token check ───────────────────────────────────────────────────────────────
78
  if not HF_TOKEN:
79
+ st.error("❌ HF_TOKEN secret not found. Add it to HF Space / GitHub secrets.")
80
  st.stop()
81
 
82
+ # ── Load dataset ──────────────────────────────────────────────────────────────
83
  with st.spinner("📡 Loading dataset from HuggingFace..."):
84
+ df_raw = load_dataset(HF_TOKEN)
85
 
86
+ if df_raw.empty:
87
  st.stop()
88
 
89
  # ── Freshness check ───────────────────────────────────────────────────────────
90
+ freshness = check_data_freshness(df_raw)
91
  show_freshness_status(freshness)
92
 
93
  # ── Dataset summary in sidebar ────────────────────────────────────────────────
94
  with st.sidebar:
95
  st.divider()
96
  st.subheader("📦 Dataset Info")
97
+ summary = dataset_summary(df_raw)
98
  if summary:
99
  st.write(f"**Rows:** {summary['rows']:,}")
100
  st.write(f"**Range:** {summary['start_date']} → {summary['end_date']}")
101
+ st.write(f"**ETFs:** {', '.join(summary['etfs_found'])}")
102
+ st.write(f"**Benchmarks:** {', '.join(summary['benchmarks'])}")
103
+ st.write(f"**Macro:** {', '.join(summary['macro_found'])}")
104
  st.write(f"**T-bill col:** {'✅' if summary['tbill_found'] else '❌'}")
105
 
106
+ # ── Wait for run button ───────────────────────────────────────────────────────
107
  if not run_button:
108
+ st.info("👈 Configure parameters in the sidebar and click **🚀 Run All 3 Approaches**.")
109
  st.stop()
110
 
111
  # ── Filter by start year ──────────────────────────────────────────────────────
112
+ df = df_raw[df_raw.index.year >= start_yr].copy()
113
+ st.write(
114
+ f"📅 **Data:** {df.index[0].strftime('%Y-%m-%d')} → {df.index[-1].strftime('%Y-%m-%d')} "
115
+ f"({df.index[-1].year - df.index[0].year + 1} years)"
116
+ )
117
 
118
+ # ── Features & targets ────────────────────────────────────────────────────────
119
  try:
120
+ input_features, target_etfs, tbill_rate, df = get_features_and_targets(df)
121
  except ValueError as e:
122
  st.error(str(e))
123
  st.stop()
124
 
125
+ n_etfs = len(target_etfs)
126
+ n_classes = n_etfs + (1 if include_cash else 0)
127
+
128
+ st.info(
129
+ f"🎯 **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} · "
130
+ f"**Features:** {len(input_features)} signals · "
131
+ f"**T-bill:** {tbill_rate*100:.2f}%"
132
+ )
133
 
134
+ # ── Build sequences ───────────────────────────────────────────────────────────
135
+ X_raw = df[input_features].values.astype(np.float32)
136
+ y_raw = df[target_etfs].values.astype(np.float32)
 
 
137
 
138
+ # Fill any remaining NaNs with column means
139
  col_means = np.nanmean(X_raw, axis=0)
140
  for j in range(X_raw.shape[1]):
141
  mask = np.isnan(X_raw[:, j])
142
+ if mask.any():
143
+ X_raw[mask, j] = col_means[j]
144
 
145
  X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
146
  y_labels = returns_to_labels(y_seq, include_cash=include_cash)
147
 
148
+ (X_train, y_train_r, X_val, y_val_r,
149
+ X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
150
+ (_, y_train_l, _, y_val_l,
151
+ _, y_test_l) = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
152
 
153
  X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
154
 
155
  train_size = len(X_train)
156
  val_size = len(X_val)
157
 
158
+ test_start = lookback + train_size + val_size
159
+ test_dates = df.index[test_start: test_start + len(X_test)]
160
+ test_slice = slice(test_start, test_start + len(X_test))
 
161
 
162
+ st.success(
163
+ f"✅ Sequences — Train: {train_size:,} · Val: {val_size:,} · Test: {len(X_test):,}"
164
+ )
165
 
166
  # ── Train all three approaches ────────────────────────────────────────────────
167
  results = {}
168
+ trained_info = {}
169
 
170
  progress = st.progress(0, text="Starting training...")
171
 
172
+ # ── Approach 1 ────────────────────────────────────────────────────────────────
173
  with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
174
  try:
175
  model1, hist1, _ = train_approach1(
 
179
  )
180
  preds1, proba1 = predict_approach1(model1, X_test_s)
181
  results["Approach 1"] = execute_strategy(
182
+ preds1, proba1, y_test_r, test_dates,
183
+ target_etfs, fee_bps, tbill_rate, include_cash,
184
  )
185
  trained_info["Approach 1"] = {"proba": proba1}
186
  st.success("✅ Approach 1 complete")
 
190
 
191
  progress.progress(33, text="Approach 1 done...")
192
 
193
+ # ── Approach 2 ────────────────────────────────────────────────────────────────
194
  with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
195
  try:
196
  model2, hist2, hmm2, regime_cols2 = train_approach2(
 
208
  lookback, train_size, val_size,
209
  )
210
  results["Approach 2"] = execute_strategy(
211
+ preds2, proba2, y_test_r, test_dates,
212
+ target_etfs, fee_bps, tbill_rate, include_cash,
213
  )
214
  trained_info["Approach 2"] = {"proba": proba2}
215
  st.success("✅ Approach 2 complete")
 
219
 
220
  progress.progress(66, text="Approach 2 done...")
221
 
222
+ # ── Approach 3 ────────────────────────────────────────────────────────────────
223
  with st.spinner("📡 Training Approach 3 — Multi-Scale CNN-LSTM..."):
224
  try:
225
  model3, hist3 = train_approach3(
 
229
  )
230
  preds3, proba3 = predict_approach3(model3, X_test_s)
231
  results["Approach 3"] = execute_strategy(
232
+ preds3, proba3, y_test_r, test_dates,
233
+ target_etfs, fee_bps, tbill_rate, include_cash,
234
  )
235
  trained_info["Approach 3"] = {"proba": proba3}
236
  st.success("✅ Approach 3 complete")
 
246
  winner_res = results.get(winner_name)
247
 
248
  if winner_res is None:
249
+ st.error("❌ All approaches failed. Please check data and configuration.")
250
  st.stop()
251
 
 
252
  next_date = get_next_signal_date()
253
 
254
  st.divider()
255
 
256
+ # ── Signal banner ─────────────────────────────────────────────────────────────
257
  show_signal_banner(winner_res["next_signal"], next_date, winner_name)
258
 
259
  # ── Conviction panel ──────────────────────────────────────────────────────────
 
274
  comparison_df = build_comparison_table(results, winner_name)
275
  show_comparison_table(comparison_df)
276
 
 
277
  st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
278
 
279
  st.divider()
 
285
 
286
  st.divider()
287
 
288
+ # ── Audit trail ───────────────────────────────────────────────────────────────
289
  st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
290
  show_audit_trail(winner_res["audit_trail"])
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py CHANGED
@@ -3,6 +3,10 @@ data/loader.py
3
  Loads master_data.parquet from HF Dataset.
4
  Validates freshness against the last NYSE trading day.
5
  No external pings — all data comes from HF Dataset only.
 
 
 
 
6
  """
7
 
8
  import pandas as pd
@@ -22,31 +26,29 @@ except ImportError:
22
  DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
23
  PARQUET_FILE = "master_data.parquet"
24
 
25
- # Columns expected in the dataset
26
- REQUIRED_ETF_COLS = ["TLT_Ret", "TBT_Ret", "VNQ_Ret", "SLV_Ret", "GLD_Ret"]
27
- BENCHMARK_COLS = ["SPY_Ret", "AGG_Ret"]
28
- TBILL_COL = "DTB3" # 3m T-bill column in HF dataset
29
- TARGET_ETFS = REQUIRED_ETF_COLS # 5 targets (no CASH in returns, CASH handled in strategy)
30
 
31
 
32
  # ── NYSE calendar helpers ─────────────────────────────────────────────────────
33
 
34
- def get_last_nyse_trading_day(as_of: datetime = None) -> datetime.date:
35
- """Return the most recent NYSE trading day before or on as_of (default: today EST)."""
36
  est = pytz.timezone("US/Eastern")
37
  if as_of is None:
38
  as_of = datetime.now(est)
39
-
40
  today = as_of.date()
41
 
42
  if NYSE_CAL_AVAILABLE:
43
  try:
44
- nyse = mcal.get_calendar("NYSE")
45
- # Look back up to 10 days to find last trading day
46
  start = today - timedelta(days=10)
47
- schedule = nyse.schedule(start_date=start, end_date=today)
48
- if len(schedule) > 0:
49
- return schedule.index[-1].date()
50
  except Exception:
51
  pass
52
 
@@ -57,18 +59,6 @@ def get_last_nyse_trading_day(as_of: datetime = None) -> datetime.date:
57
  return candidate
58
 
59
 
60
- def is_nyse_trading_day(date) -> bool:
61
- """Return True if date is a NYSE trading day."""
62
- if NYSE_CAL_AVAILABLE:
63
- try:
64
- nyse = mcal.get_calendar("NYSE")
65
- schedule = nyse.schedule(start_date=date, end_date=date)
66
- return len(schedule) > 0
67
- except Exception:
68
- pass
69
- return date.weekday() < 5
70
-
71
-
72
  # ── Data loading ──────────────────────────────────────────────────────────────
73
 
74
  @st.cache_data(ttl=3600, show_spinner=False)
@@ -88,10 +78,10 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
88
 
89
  # Ensure DatetimeIndex
90
  if not isinstance(df.index, pd.DatetimeIndex):
91
- if "Date" in df.columns:
92
- df = df.set_index("Date")
93
- elif "date" in df.columns:
94
- df = df.set_index("date")
95
  df.index = pd.to_datetime(df.index)
96
 
97
  df = df.sort_index()
@@ -107,14 +97,6 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
107
  def check_data_freshness(df: pd.DataFrame) -> dict:
108
  """
109
  Check whether the dataset contains data for the last NYSE trading day.
110
-
111
- Returns a dict:
112
- {
113
- "fresh": bool,
114
- "last_date_in_data": date,
115
- "expected_date": date,
116
- "message": str
117
- }
118
  """
119
  if df.empty:
120
  return {
@@ -126,8 +108,7 @@ def check_data_freshness(df: pd.DataFrame) -> dict:
126
 
127
  last_date_in_data = df.index[-1].date()
128
  expected_date = get_last_nyse_trading_day()
129
-
130
- fresh = last_date_in_data >= expected_date
131
 
132
  if fresh:
133
  message = f"✅ Dataset is up to date through **{last_date_in_data}**."
@@ -150,66 +131,105 @@ def check_data_freshness(df: pd.DataFrame) -> dict:
150
 
151
  def get_features_and_targets(df: pd.DataFrame):
152
  """
153
- Extract input feature columns and target ETF return columns from the dataset.
 
 
 
154
 
155
  Returns:
156
- input_features : list of column names
157
- target_etfs : list of ETF return column names (e.g. TLT_Ret)
158
- tbill_rate : latest 3m T-bill rate as a float (annualised, e.g. 0.045)
 
159
  """
160
- # Target ETF return columns
161
- target_etfs = [c for c in REQUIRED_ETF_COLS if c in df.columns]
162
 
163
- if not target_etfs:
 
 
164
  raise ValueError(
165
- f"No target ETF columns found. Expected: {REQUIRED_ETF_COLS}. "
166
  f"Found in dataset: {list(df.columns)}"
167
  )
168
 
169
- # Input features: Z-scores, vol, regime, yield curve, credit, rates, VIX terms
170
- exclude = set(target_etfs + BENCHMARK_COLS + [TBILL_COL])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  input_features = [
172
  c for c in df.columns
173
  if c not in exclude
174
- and (
175
- c.endswith("_Z")
176
- or c.endswith("_Vol")
177
- or "Regime" in c
178
- or "YC_" in c
179
- or "Credit_" in c
180
- or "Rates_" in c
181
- or "VIX_" in c
182
- or "Spread" in c
183
- or "DXY" in c
184
- or "VIX" in c
185
- or "T10Y" in c
186
- )
187
  ]
188
 
189
- # 3m T-bill rate (for CASH return & Sharpe)
190
- tbill_rate = 0.045 # default fallback
 
 
 
 
 
 
 
 
191
  if TBILL_COL in df.columns:
192
  raw = df[TBILL_COL].dropna()
193
  if len(raw) > 0:
194
- last_val = raw.iloc[-1]
195
- # DTB3 is typically in percent (e.g. 5.25 means 5.25%)
196
- tbill_rate = float(last_val) / 100 if last_val > 1 else float(last_val)
197
 
198
- return input_features, target_etfs, tbill_rate
199
 
200
 
201
- # ── Column info helper (for sidebar display) ──────────────────────────────────
202
 
203
  def dataset_summary(df: pd.DataFrame) -> dict:
204
- """Return a brief summary dict for sidebar display."""
205
  if df.empty:
206
  return {}
207
  return {
208
- "rows": len(df),
209
- "columns": len(df.columns),
210
- "start_date": df.index[0].strftime("%Y-%m-%d"),
211
- "end_date": df.index[-1].strftime("%Y-%m-%d"),
212
- "etfs_found": [c for c in REQUIRED_ETF_COLS if c in df.columns],
213
- "benchmarks": [c for c in BENCHMARK_COLS if c in df.columns],
 
214
  "tbill_found": TBILL_COL in df.columns,
215
  }
 
3
  Loads master_data.parquet from HF Dataset.
4
  Validates freshness against the last NYSE trading day.
5
  No external pings — all data comes from HF Dataset only.
6
+
7
+ Actual dataset columns (from parquet inspection):
8
+ ETFs : AGG, GLD, SLV, SPY, TBT, TLT, VNQ
9
+ Macro : VIX, DXY, T10Y2Y, TBILL_3M, IG_SPREAD, HY_SPREAD
10
  """
11
 
12
  import pandas as pd
 
26
  DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
27
  PARQUET_FILE = "master_data.parquet"
28
 
29
+ # ── Actual column names in the dataset ───────────────────────────────────────
30
+ TARGET_ETF_COLS = ["TLT", "TBT", "VNQ", "SLV", "GLD"] # traded ETFs
31
+ BENCHMARK_COLS = ["SPY", "AGG"] # chart only
32
+ TBILL_COL = "TBILL_3M" # 3m T-bill rate
33
+ MACRO_COLS = ["VIX", "DXY", "T10Y2Y", "IG_SPREAD", "HY_SPREAD"]
34
 
35
 
36
  # ── NYSE calendar helpers ─────────────────────────────────────────────────────
37
 
38
+ def get_last_nyse_trading_day(as_of=None):
39
+ """Return the most recent NYSE trading day on or before as_of (default: today EST)."""
40
  est = pytz.timezone("US/Eastern")
41
  if as_of is None:
42
  as_of = datetime.now(est)
 
43
  today = as_of.date()
44
 
45
  if NYSE_CAL_AVAILABLE:
46
  try:
47
+ nyse = mcal.get_calendar("NYSE")
 
48
  start = today - timedelta(days=10)
49
+ sched = nyse.schedule(start_date=start, end_date=today)
50
+ if len(sched) > 0:
51
+ return sched.index[-1].date()
52
  except Exception:
53
  pass
54
 
 
59
  return candidate
60
 
61
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  # ── Data loading ──────────────────────────────────────────────────────────────
63
 
64
  @st.cache_data(ttl=3600, show_spinner=False)
 
78
 
79
  # Ensure DatetimeIndex
80
  if not isinstance(df.index, pd.DatetimeIndex):
81
+ for col in ["Date", "date", "DATE"]:
82
+ if col in df.columns:
83
+ df = df.set_index(col)
84
+ break
85
  df.index = pd.to_datetime(df.index)
86
 
87
  df = df.sort_index()
 
97
  def check_data_freshness(df: pd.DataFrame) -> dict:
98
  """
99
  Check whether the dataset contains data for the last NYSE trading day.
 
 
 
 
 
 
 
 
100
  """
101
  if df.empty:
102
  return {
 
108
 
109
  last_date_in_data = df.index[-1].date()
110
  expected_date = get_last_nyse_trading_day()
111
+ fresh = last_date_in_data >= expected_date
 
112
 
113
  if fresh:
114
  message = f"✅ Dataset is up to date through **{last_date_in_data}**."
 
131
 
132
  def get_features_and_targets(df: pd.DataFrame):
133
  """
134
+ Extract input feature columns and target ETF return columns.
135
+
136
+ The dataset stores raw price or return values directly under ticker names.
137
+ We compute daily log returns for target ETFs if they are not already returns.
138
 
139
  Returns:
140
+ input_features : list of column names to use as model inputs
141
+ target_etfs : list of ETF column names (after return computation)
142
+ tbill_rate : latest 3m T-bill rate as float (annualised, e.g. 0.045)
143
+ df : DataFrame (possibly with new _Ret columns added)
144
  """
 
 
145
 
146
+ # ── Confirm target ETFs exist ─────────────────────────────────────────────
147
+ missing = [c for c in TARGET_ETF_COLS if c not in df.columns]
148
+ if missing:
149
  raise ValueError(
150
+ f"Missing ETF columns: {missing}. "
151
  f"Found in dataset: {list(df.columns)}"
152
  )
153
 
154
+ # ── Build return columns ──────────────────────────────────────────────────
155
+ # If values look like prices (>5), compute pct returns.
156
+ # If they already look like small returns (<1 in abs), use as-is.
157
+ target_etfs = []
158
+ for col in TARGET_ETF_COLS:
159
+ ret_col = f"{col}_Ret"
160
+ if ret_col not in df.columns:
161
+ sample = df[col].dropna()
162
+ if len(sample) > 0 and abs(sample.median()) > 1:
163
+ # Looks like price — compute pct change
164
+ df[ret_col] = df[col].pct_change()
165
+ else:
166
+ # Already returns
167
+ df[ret_col] = df[col]
168
+ target_etfs.append(ret_col)
169
+
170
+ # Same for benchmarks
171
+ for col in BENCHMARK_COLS:
172
+ ret_col = f"{col}_Ret"
173
+ if ret_col not in df.columns and col in df.columns:
174
+ sample = df[col].dropna()
175
+ if len(sample) > 0 and abs(sample.median()) > 1:
176
+ df[ret_col] = df[col].pct_change()
177
+ else:
178
+ df[ret_col] = df[col]
179
+
180
+ # Drop rows with NaN in target columns (first row after pct_change)
181
+ df = df.dropna(subset=target_etfs)
182
+
183
+ # ── Input features ────────────────────────────────────────────────────────
184
+ # Use macro columns directly; exclude ETF price/return cols and benchmarks
185
+ exclude = set(
186
+ TARGET_ETF_COLS + BENCHMARK_COLS +
187
+ target_etfs +
188
+ [f"{c}_Ret" for c in BENCHMARK_COLS] +
189
+ [TBILL_COL]
190
+ )
191
+
192
  input_features = [
193
  c for c in df.columns
194
  if c not in exclude
195
+ and c in (MACRO_COLS + [
196
+ col for col in df.columns
197
+ if any(k in col for k in ["_Z", "_Vol", "Regime", "YC_", "Credit_",
198
+ "Rates_", "VIX_", "Spread", "DXY", "T10Y"])
199
+ ])
 
 
 
 
 
 
 
 
200
  ]
201
 
202
+ # Fallback: if none matched, use all non-excluded numeric columns
203
+ if not input_features:
204
+ input_features = [
205
+ c for c in df.columns
206
+ if c not in exclude
207
+ and pd.api.types.is_numeric_dtype(df[c])
208
+ ]
209
+
210
+ # ── T-bill rate ───────────────────────────────────────────────────────────
211
+ tbill_rate = 0.045 # default
212
  if TBILL_COL in df.columns:
213
  raw = df[TBILL_COL].dropna()
214
  if len(raw) > 0:
215
+ last_val = float(raw.iloc[-1])
216
+ tbill_rate = last_val / 100 if last_val > 1 else last_val
 
217
 
218
+ return input_features, target_etfs, tbill_rate, df
219
 
220
 
221
+ # ── Dataset summary ───────────────────────────────────────────���───────────────
222
 
223
  def dataset_summary(df: pd.DataFrame) -> dict:
 
224
  if df.empty:
225
  return {}
226
  return {
227
+ "rows": len(df),
228
+ "columns": len(df.columns),
229
+ "start_date": df.index[0].strftime("%Y-%m-%d"),
230
+ "end_date": df.index[-1].strftime("%Y-%m-%d"),
231
+ "etfs_found": [c for c in TARGET_ETF_COLS if c in df.columns],
232
+ "benchmarks": [c for c in BENCHMARK_COLS if c in df.columns],
233
+ "macro_found": [c for c in MACRO_COLS if c in df.columns],
234
  "tbill_found": TBILL_COL in df.columns,
235
  }
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/data/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md CHANGED
@@ -1,3 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  # P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
2
 
3
  Macro-driven ETF rotation using three augmented CNN-LSTM variants.
 
1
+ ---
2
+ title: P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
3
+ emoji: 🧠
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: streamlit
7
+ sdk_version: "1.32.0"
8
+ python_version: "3.10"
9
+ app_file: app.py
10
+ pinned: false
11
+ ---
12
+
13
  # P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
14
 
15
  Macro-driven ETF rotation using three augmented CNN-LSTM variants.
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ data/loader.py
3
+ Loads master_data.parquet from HF Dataset.
4
+ Validates freshness against the last NYSE trading day.
5
+ No external pings — all data comes from HF Dataset only.
6
+ """
7
+
8
+ import pandas as pd
9
+ import numpy as np
10
+ import streamlit as st
11
+ from huggingface_hub import hf_hub_download
12
+ from datetime import datetime, timedelta
13
+ import pytz
14
+ import os
15
+
16
+ try:
17
+ import pandas_market_calendars as mcal
18
+ NYSE_CAL_AVAILABLE = True
19
+ except ImportError:
20
+ NYSE_CAL_AVAILABLE = False
21
+
22
+ DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
23
+ PARQUET_FILE = "master_data.parquet"
24
+
25
+ # Columns expected in the dataset
26
+ REQUIRED_ETF_COLS = ["TLT_Ret", "TBT_Ret", "VNQ_Ret", "SLV_Ret", "GLD_Ret"]
27
+ BENCHMARK_COLS = ["SPY_Ret", "AGG_Ret"]
28
+ TBILL_COL = "DTB3" # 3m T-bill column in HF dataset
29
+ TARGET_ETFS = REQUIRED_ETF_COLS # 5 targets (no CASH in returns, CASH handled in strategy)
30
+
31
+
32
+ # ── NYSE calendar helpers ─────────────────────────────────────────────────────
33
+
34
+ def get_last_nyse_trading_day(as_of: datetime = None) -> datetime.date:
35
+ """Return the most recent NYSE trading day before or on as_of (default: today EST)."""
36
+ est = pytz.timezone("US/Eastern")
37
+ if as_of is None:
38
+ as_of = datetime.now(est)
39
+
40
+ today = as_of.date()
41
+
42
+ if NYSE_CAL_AVAILABLE:
43
+ try:
44
+ nyse = mcal.get_calendar("NYSE")
45
+ # Look back up to 10 days to find last trading day
46
+ start = today - timedelta(days=10)
47
+ schedule = nyse.schedule(start_date=start, end_date=today)
48
+ if len(schedule) > 0:
49
+ return schedule.index[-1].date()
50
+ except Exception:
51
+ pass
52
+
53
+ # Fallback: skip weekends
54
+ candidate = today
55
+ while candidate.weekday() >= 5:
56
+ candidate -= timedelta(days=1)
57
+ return candidate
58
+
59
+
60
+ def is_nyse_trading_day(date) -> bool:
61
+ """Return True if date is a NYSE trading day."""
62
+ if NYSE_CAL_AVAILABLE:
63
+ try:
64
+ nyse = mcal.get_calendar("NYSE")
65
+ schedule = nyse.schedule(start_date=date, end_date=date)
66
+ return len(schedule) > 0
67
+ except Exception:
68
+ pass
69
+ return date.weekday() < 5
70
+
71
+
72
+ # ── Data loading ──────────────────────────────────────────────────────────────
73
+
74
+ @st.cache_data(ttl=3600, show_spinner=False)
75
+ def load_dataset(hf_token: str) -> pd.DataFrame:
76
+ """
77
+ Download master_data.parquet from HF Dataset and return as DataFrame.
78
+ Cached for 1 hour. Index is parsed as DatetimeIndex.
79
+ """
80
+ try:
81
+ path = hf_hub_download(
82
+ repo_id=DATASET_REPO,
83
+ filename=PARQUET_FILE,
84
+ repo_type="dataset",
85
+ token=hf_token,
86
+ )
87
+ df = pd.read_parquet(path)
88
+
89
+ # Ensure DatetimeIndex
90
+ if not isinstance(df.index, pd.DatetimeIndex):
91
+ if "Date" in df.columns:
92
+ df = df.set_index("Date")
93
+ elif "date" in df.columns:
94
+ df = df.set_index("date")
95
+ df.index = pd.to_datetime(df.index)
96
+
97
+ df = df.sort_index()
98
+ return df
99
+
100
+ except Exception as e:
101
+ st.error(f"❌ Failed to load dataset from HuggingFace: {e}")
102
+ return pd.DataFrame()
103
+
104
+
105
+ # ── Freshness check ───────────────────────────────────────────────────────────
106
+
107
+ def check_data_freshness(df: pd.DataFrame) -> dict:
108
+ """
109
+ Check whether the dataset contains data for the last NYSE trading day.
110
+
111
+ Returns a dict:
112
+ {
113
+ "fresh": bool,
114
+ "last_date_in_data": date,
115
+ "expected_date": date,
116
+ "message": str
117
+ }
118
+ """
119
+ if df.empty:
120
+ return {
121
+ "fresh": False,
122
+ "last_date_in_data": None,
123
+ "expected_date": None,
124
+ "message": "Dataset is empty.",
125
+ }
126
+
127
+ last_date_in_data = df.index[-1].date()
128
+ expected_date = get_last_nyse_trading_day()
129
+
130
+ fresh = last_date_in_data >= expected_date
131
+
132
+ if fresh:
133
+ message = f"✅ Dataset is up to date through **{last_date_in_data}**."
134
+ else:
135
+ message = (
136
+ f"⚠️ **{expected_date}** data not yet updated in dataset. "
137
+ f"Latest available: **{last_date_in_data}**. "
138
+ f"Please check back later — the dataset updates daily after market close."
139
+ )
140
+
141
+ return {
142
+ "fresh": fresh,
143
+ "last_date_in_data": last_date_in_data,
144
+ "expected_date": expected_date,
145
+ "message": message,
146
+ }
147
+
148
+
149
+ # ── Feature / target extraction ───────────────────────────────────────��───────
150
+
151
+ def get_features_and_targets(df: pd.DataFrame):
152
+ """
153
+ Extract input feature columns and target ETF return columns from the dataset.
154
+
155
+ Returns:
156
+ input_features : list of column names
157
+ target_etfs : list of ETF return column names (e.g. TLT_Ret)
158
+ tbill_rate : latest 3m T-bill rate as a float (annualised, e.g. 0.045)
159
+ """
160
+ # Target ETF return columns
161
+ target_etfs = [c for c in REQUIRED_ETF_COLS if c in df.columns]
162
+
163
+ if not target_etfs:
164
+ raise ValueError(
165
+ f"No target ETF columns found. Expected: {REQUIRED_ETF_COLS}. "
166
+ f"Found in dataset: {list(df.columns)}"
167
+ )
168
+
169
+ # Input features: Z-scores, vol, regime, yield curve, credit, rates, VIX terms
170
+ exclude = set(target_etfs + BENCHMARK_COLS + [TBILL_COL])
171
+ input_features = [
172
+ c for c in df.columns
173
+ if c not in exclude
174
+ and (
175
+ c.endswith("_Z")
176
+ or c.endswith("_Vol")
177
+ or "Regime" in c
178
+ or "YC_" in c
179
+ or "Credit_" in c
180
+ or "Rates_" in c
181
+ or "VIX_" in c
182
+ or "Spread" in c
183
+ or "DXY" in c
184
+ or "VIX" in c
185
+ or "T10Y" in c
186
+ )
187
+ ]
188
+
189
+ # 3m T-bill rate (for CASH return & Sharpe)
190
+ tbill_rate = 0.045 # default fallback
191
+ if TBILL_COL in df.columns:
192
+ raw = df[TBILL_COL].dropna()
193
+ if len(raw) > 0:
194
+ last_val = raw.iloc[-1]
195
+ # DTB3 is typically in percent (e.g. 5.25 means 5.25%)
196
+ tbill_rate = float(last_val) / 100 if last_val > 1 else float(last_val)
197
+
198
+ return input_features, target_etfs, tbill_rate
199
+
200
+
201
+ # ── Column info helper (for sidebar display) ──────────────────────────────────
202
+
203
+ def dataset_summary(df: pd.DataFrame) -> dict:
204
+ """Return a brief summary dict for sidebar display."""
205
+ if df.empty:
206
+ return {}
207
+ return {
208
+ "rows": len(df),
209
+ "columns": len(df.columns),
210
+ "start_date": df.index[0].strftime("%Y-%m-%d"),
211
+ "end_date": df.index[-1].strftime("%Y-%m-%d"),
212
+ "etfs_found": [c for c in REQUIRED_ETF_COLS if c in df.columns],
213
+ "benchmarks": [c for c in BENCHMARK_COLS if c in df.columns],
214
+ "tbill_found": TBILL_COL in df.columns,
215
+ }
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md CHANGED
@@ -1,19 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
- title: P2 ETF CNN LSTM ALTERNATIVE APPROACHES
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
- pinned: false
11
- short_description: Streamlit template space
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
- # Welcome to Streamlit!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
17
 
18
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
19
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
1
+ # P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
2
+
3
+ Macro-driven ETF rotation using three augmented CNN-LSTM variants.
4
+ Winner selected by **highest raw annualised return** on the out-of-sample test set.
5
+
6
+ ---
7
+
8
+ ## Architecture Overview
9
+
10
+ | Approach | Core Idea | Key Addition |
11
+ |---|---|---|
12
+ | **1 — Wavelet** | DWT decomposes each macro signal into frequency subbands before the CNN | Separates trend / cycle / noise |
13
+ | **2 — Regime-Conditioned** | HMM detects macro regimes; one-hot regime label concatenated into the network | Removes non-stationarity |
14
+ | **3 — Multi-Scale Parallel** | Three CNN towers (kernels 3, 7, 21 days) run in parallel before the LSTM | Captures momentum + cycle + trend simultaneously |
15
+
16
  ---
17
+
18
+ ## ETF Universe
19
+
20
+ | Ticker | Description |
21
+ |---|---|
22
+ | TLT | 20+ Year Treasury Bond |
23
+ | TBT | 20+ Year Treasury Short (2×) |
24
+ | VNQ | Real Estate (REIT) |
25
+ | SLV | Silver |
26
+ | GLD | Gold |
27
+ | CASH | 3m T-bill rate (from HF dataset) |
28
+
29
+ Benchmarks (chart only, not traded): **SPY**, **AGG**
30
+
31
+ ---
32
+
33
+ ## Data
34
+
35
+ All data sourced exclusively from:
36
+ **`P2SAMAPA/fi-etf-macro-signal-master-data`** (HuggingFace Dataset)
37
+ File: `master_data.parquet`
38
+
39
+ No external API calls (no yfinance, no FRED).
40
+ The app checks daily whether the prior NYSE trading day's data is present in the dataset.
41
+
42
  ---
43
 
44
+ ## Project Structure
45
+
46
+ ```
47
+ ├── .github/
48
+ │ └── workflows/
49
+ │ └── sync.yml # Auto-sync GitHub → HF Space on push to main
50
+
51
+ ├── app.py # Streamlit orchestrator (UI wiring only)
52
+
53
+ ├── data/
54
+ │ └── loader.py # HF dataset load, freshness check, column validation
55
+
56
+ ├── models/
57
+ │ ├── base.py # Shared: sequences, splits, scaling, callbacks
58
+ │ ├── approach1_wavelet.py # Wavelet CNN-LSTM
59
+ │ ├── approach2_regime.py # Regime-Conditioned CNN-LSTM
60
+ │ └── approach3_multiscale.py # Multi-Scale Parallel CNN-LSTM
61
+
62
+ ├── strategy/
63
+ │ └── backtest.py # execute_strategy, metrics, winner selection
64
+
65
+ ├── signals/
66
+ │ └── conviction.py # Z-score conviction scoring
67
+
68
+ ├── ui/
69
+ │ ├── components.py # Banner, conviction panel, metrics, audit trail
70
+ │ └── charts.py # Plotly equity curve + comparison bar chart
71
+
72
+ ├── utils/
73
+ │ └── calendar.py # NYSE calendar, next trading day, EST time
74
+
75
+ ├── requirements.txt
76
+ └── README.md
77
+ ```
78
+
79
+ ---
80
+
81
+ ## Secrets Required
82
+
83
+ | Secret | Where | Purpose |
84
+ |---|---|---|
85
+ | `HF_TOKEN` | GitHub + HF Space | Read HF dataset · Sync HF Space |
86
+
87
+ Set in:
88
+ - GitHub: `Settings → Secrets → Actions → New repository secret`
89
+ - HF Space: `Settings → Repository secrets`
90
+
91
+ ---
92
+
93
+ ## Deployment
94
+
95
+ Push to `main` → GitHub Actions (`sync.yml`) automatically syncs to HF Space.
96
+
97
+ ### Local development
98
+
99
+ ```bash
100
+ pip install -r requirements.txt
101
+ export HF_TOKEN=your_token
102
+ streamlit run app.py
103
+ ```
104
+
105
+ ---
106
 
107
+ ## Output UI
108
 
109
+ 1. **Data freshness warning** alerts if prior NYSE trading day data is missing
110
+ 2. **Next Trading Day Signal** — date + ETF from the winning approach
111
+ 3. **Signal Conviction** — Z-score gauge + per-ETF probability bars
112
+ 4. **Performance Metrics** — Annualised Return, Sharpe, Hit Ratio, Max DD
113
+ 5. **Approach Comparison Table** — all three approaches side by side
114
+ 6. **Equity Curves** — all three approaches + SPY + AGG benchmarks
115
+ 7. **Audit Trail** — last 20 trading days for the winning approach
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ app.py
3
+ P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
4
+ Streamlit orchestrator — UI wiring only, no business logic here.
5
+ """
6
+
7
+ import os
8
+ import streamlit as st
9
+ import pandas as pd
10
+ import numpy as np
11
+
12
+ # ── Module imports ────────────────────────────────────────────────────────────
13
+ from data.loader import load_dataset, check_data_freshness, get_features_and_targets, dataset_summary
14
+ from utils.calendar import get_est_time, is_sync_window, get_next_signal_date
15
+ from models.base import build_sequences, train_val_test_split, scale_features, returns_to_labels
16
+ from models.approach1_wavelet import train_approach1, predict_approach1
17
+ from models.approach2_regime import train_approach2, predict_approach2
18
+ from models.approach3_multiscale import train_approach3, predict_approach3
19
+ from strategy.backtest import execute_strategy, select_winner, build_comparison_table
20
+ from signals.conviction import compute_conviction
21
+ from ui.components import (
22
+ show_freshness_status, show_signal_banner, show_conviction_panel,
23
+ show_metrics_row, show_comparison_table, show_audit_trail,
24
+ )
25
+ from ui.charts import equity_curve_chart, comparison_bar_chart
26
+
27
+ # ── Page config ───────────────────────────────────────────────────────────────
28
+ st.set_page_config(
29
+ page_title="P2-ETF-CNN-LSTM",
30
+ page_icon="🧠",
31
+ layout="wide",
32
+ )
33
+
34
+ # ── Secrets ───────────────────────────────────────────────────────────────────
35
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
36
+
37
+ # ── Sidebar ───────────────────────────────────────────────────────────────────
38
+ with st.sidebar:
39
+ st.header("⚙️ Configuration")
40
+
41
+ now_est = get_est_time()
42
+ st.write(f"🕒 **EST:** {now_est.strftime('%H:%M:%S')}")
43
+ if is_sync_window():
44
+ st.success("✅ Sync Window Active")
45
+ else:
46
+ st.info("⏸️ Sync Window Inactive")
47
+
48
+ st.divider()
49
+
50
+ start_yr = st.slider("📅 Start Year", 2010, 2024, 2016)
51
+ fee_bps = st.slider("💰 Fee (bps)", 0, 50, 10)
52
+ lookback = st.slider("📐 Lookback (days)", 20, 60, 30, step=5)
53
+ epochs = st.number_input("🔁 Max Epochs", 20, 300, 100, step=10)
54
+
55
+ st.divider()
56
+
57
+ split_option = st.selectbox("📊 Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0)
58
+ split_map = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}
59
+ train_pct, val_pct = split_map[split_option]
60
+
61
+ include_cash = st.checkbox("💵 Include CASH class", value=True,
62
+ help="Model can select CASH (earns T-bill rate) as an alternative to any ETF")
63
+
64
+ st.divider()
65
+
66
+ run_button = st.button("🚀 Run All 3 Approaches", type="primary", use_container_width=True)
67
+
68
+ # ── Title ─────────────────────────────────────────────────────────────────────
69
+ st.title("🧠 P2-ETF-CNN-LSTM")
70
+ st.caption("Approach 1: Wavelet · Approach 2: Regime-Conditioned · Approach 3: Multi-Scale Parallel")
71
+ st.caption("Winner selected by highest raw annualised return on out-of-sample test set.")
72
+
73
+ # ── Load data (always, to check freshness) ────────────────────────────────────
74
+ if not HF_TOKEN:
75
+ st.error("❌ HF_TOKEN secret not found. Please add it to your HF Space / GitHub secrets.")
76
+ st.stop()
77
+
78
+ with st.spinner("📡 Loading dataset from HuggingFace..."):
79
+ df = load_dataset(HF_TOKEN)
80
+
81
+ if df.empty:
82
+ st.stop()
83
+
84
+ # ── Freshness check ───────────────────────────────────────────────────────────
85
+ freshness = check_data_freshness(df)
86
+ show_freshness_status(freshness)
87
+
88
+ # ── Dataset summary in sidebar ────────────────────────────────────────────────
89
+ with st.sidebar:
90
+ st.divider()
91
+ st.subheader("📦 Dataset Info")
92
+ summary = dataset_summary(df)
93
+ if summary:
94
+ st.write(f"**Rows:** {summary['rows']:,}")
95
+ st.write(f"**Range:** {summary['start_date']} → {summary['end_date']}")
96
+ st.write(f"**ETFs:** {', '.join([e.replace('_Ret','') for e in summary['etfs_found']])}")
97
+ st.write(f"**Benchmarks:** {', '.join([b.replace('_Ret','') for b in summary['benchmarks']])}")
98
+ st.write(f"**T-bill col:** {'✅' if summary['tbill_found'] else '❌'}")
99
+
100
+ # ── Main execution ────────────────────────────────────────────────────────────
101
+ if not run_button:
102
+ st.info("👈 Configure parameters in the sidebar and click **🚀 Run All 3 Approaches** to begin.")
103
+ st.stop()
104
+
105
+ # ── Filter by start year ──────────────────────────────────────────────────────
106
+ df = df[df.index.year >= start_yr].copy()
107
+ st.write(f"📅 **Data:** {df.index[0].strftime('%Y-%m-%d')} → {df.index[-1].strftime('%Y-%m-%d')} "
108
+ f"({df.index[-1].year - df.index[0].year + 1} years)")
109
+
110
+ # ── Feature / target extraction ───────────────────────────────────────────────
111
+ try:
112
+ input_features, target_etfs, tbill_rate = get_features_and_targets(df)
113
+ except ValueError as e:
114
+ st.error(str(e))
115
+ st.stop()
116
+
117
+ st.info(f"🎯 **Targets:** {len(target_etfs)} ETFs · **Features:** {len(input_features)} signals · "
118
+ f"**T-bill rate:** {tbill_rate*100:.2f}%")
119
+
120
+ # ── Prepare sequences ─────────────────────────────────────────────────────────
121
+ X_raw = df[input_features].values.astype(np.float32)
122
+ y_raw = df[target_etfs].values.astype(np.float32)
123
+ n_etfs = len(target_etfs)
124
+ n_classes = n_etfs + (1 if include_cash else 0) # +1 for CASH
125
+
126
+ # Fill NaNs with column means
127
+ col_means = np.nanmean(X_raw, axis=0)
128
+ for j in range(X_raw.shape[1]):
129
+ mask = np.isnan(X_raw[:, j])
130
+ X_raw[mask, j] = col_means[j]
131
+
132
+ X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
133
+ y_labels = returns_to_labels(y_seq, include_cash=include_cash)
134
+
135
+ X_train, y_train_r, X_val, y_val_r, X_test, y_test_r = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
136
+ _, y_train_l, _, y_val_l, _, y_test_l = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
137
+
138
+ X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
139
+
140
+ train_size = len(X_train)
141
+ val_size = len(X_val)
142
+
143
+ # Test dates (aligned with y_test)
144
+ test_start = lookback + train_size + val_size
145
+ test_dates = df.index[test_start: test_start + len(X_test)]
146
+ test_slice = slice(test_start, test_start + len(X_test))
147
+
148
+ st.success(f"✅ Sequences — Train: {train_size} · Val: {val_size} · Test: {len(X_test)}")
149
+
150
+ # ── Train all three approaches ────────────────────────────────────────────────
151
+ results = {}
152
+ trained_info = {} # store extra info needed for conviction
153
+
154
+ progress = st.progress(0, text="Starting training...")
155
+
156
+ # ── Approach 1: Wavelet ───────────────────────────────────────────────────────
157
+ with st.spinner("🌊 Training Approach 1 — Wavelet CNN-LSTM..."):
158
+ try:
159
+ model1, hist1, _ = train_approach1(
160
+ X_train_s, y_train_l,
161
+ X_val_s, y_val_l,
162
+ n_classes=n_classes, epochs=int(epochs),
163
+ )
164
+ preds1, proba1 = predict_approach1(model1, X_test_s)
165
+ results["Approach 1"] = execute_strategy(
166
+ preds1, proba1, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash,
167
+ )
168
+ trained_info["Approach 1"] = {"proba": proba1}
169
+ st.success("✅ Approach 1 complete")
170
+ except Exception as e:
171
+ st.warning(f"⚠️ Approach 1 failed: {e}")
172
+ results["Approach 1"] = None
173
+
174
+ progress.progress(33, text="Approach 1 done...")
175
+
176
+ # ── Approach 2: Regime-Conditioned ───────────────────────────────────────────
177
+ with st.spinner("🔀 Training Approach 2 — Regime-Conditioned CNN-LSTM..."):
178
+ try:
179
+ model2, hist2, hmm2, regime_cols2 = train_approach2(
180
+ X_train_s, y_train_l,
181
+ X_val_s, y_val_l,
182
+ X_flat_all=X_raw,
183
+ feature_names=input_features,
184
+ lookback=lookback,
185
+ train_size=train_size,
186
+ val_size=val_size,
187
+ n_classes=n_classes, epochs=int(epochs),
188
+ )
189
+ preds2, proba2 = predict_approach2(
190
+ model2, X_test_s, X_raw, regime_cols2, hmm2,
191
+ lookback, train_size, val_size,
192
+ )
193
+ results["Approach 2"] = execute_strategy(
194
+ preds2, proba2, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash,
195
+ )
196
+ trained_info["Approach 2"] = {"proba": proba2}
197
+ st.success("✅ Approach 2 complete")
198
+ except Exception as e:
199
+ st.warning(f"⚠️ Approach 2 failed: {e}")
200
+ results["Approach 2"] = None
201
+
202
+ progress.progress(66, text="Approach 2 done...")
203
+
204
+ # ── Approach 3: Multi-Scale ───────────────────────────────────────────────────
205
+ with st.spinner("📡 Training Approach 3 — Multi-Scale CNN-LSTM..."):
206
+ try:
207
+ model3, hist3 = train_approach3(
208
+ X_train_s, y_train_l,
209
+ X_val_s, y_val_l,
210
+ n_classes=n_classes, epochs=int(epochs),
211
+ )
212
+ preds3, proba3 = predict_approach3(model3, X_test_s)
213
+ results["Approach 3"] = execute_strategy(
214
+ preds3, proba3, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash,
215
+ )
216
+ trained_info["Approach 3"] = {"proba": proba3}
217
+ st.success("✅ Approach 3 complete")
218
+ except Exception as e:
219
+ st.warning(f"⚠️ Approach 3 failed: {e}")
220
+ results["Approach 3"] = None
221
+
222
+ progress.progress(100, text="All approaches complete!")
223
+ progress.empty()
224
+
225
+ # ── Select winner ─────────────────────────────────────────────────────────────
226
+ winner_name = select_winner(results)
227
+ winner_res = results.get(winner_name)
228
+
229
+ if winner_res is None:
230
+ st.error("❌ All approaches failed. Please check your data and configuration.")
231
+ st.stop()
232
+
233
+ # ── Next trading date ─────────────────────────────────────────────────────────
234
+ next_date = get_next_signal_date()
235
+
236
+ st.divider()
237
+
238
+ # ── Signal banner (winner) ────────────────────────────────────────────────────
239
+ show_signal_banner(winner_res["next_signal"], next_date, winner_name)
240
+
241
+ # ── Conviction panel ──────────────────────────────────────────────────────────
242
+ winner_proba = trained_info[winner_name]["proba"]
243
+ conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash)
244
+ show_conviction_panel(conviction)
245
+
246
+ st.divider()
247
+
248
+ # ── Winner metrics ────────────────────────────────────────────────────────────
249
+ st.subheader(f"📊 {winner_name} — Performance Metrics")
250
+ show_metrics_row(winner_res, tbill_rate)
251
+
252
+ st.divider()
253
+
254
+ # ── Comparison table ──────────────────────────────────────────────────────────
255
+ st.subheader("🏆 Approach Comparison (Winner = Highest Raw Annualised Return)")
256
+ comparison_df = build_comparison_table(results, winner_name)
257
+ show_comparison_table(comparison_df)
258
+
259
+ # ── Comparison bar chart ──────────────────────────────────────────────────────
260
+ st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
261
+
262
+ st.divider()
263
+
264
+ # ── Equity curves ─────────────────────────────────────────────────────────────
265
+ st.subheader("📈 Out-of-Sample Equity Curves — All Approaches vs Benchmarks")
266
+ fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
267
+ st.plotly_chart(fig, use_container_width=True)
268
+
269
+ st.divider()
270
+
271
+ # ── Audit trail (winner) ──────────────────────────────────────────────────────
272
+ st.subheader(f"📋 Audit Trail — {winner_name} (Last 20 Trading Days)")
273
+ show_audit_trail(winner_res["audit_trail"])
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.13.5-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y \
6
+ build-essential \
7
+ curl \
8
+ git \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ COPY requirements.txt ./
12
+ COPY src/ ./src/
13
+
14
+ RUN pip3 install -r requirements.txt
15
+
16
+ EXPOSE 8501
17
+
18
+ HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
19
+
20
+ ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: P2 ETF CNN LSTM ALTERNATIVE APPROACHES
3
+ emoji: 🚀
4
+ colorFrom: red
5
+ colorTo: red
6
+ sdk: docker
7
+ app_port: 8501
8
+ tags:
9
+ - streamlit
10
+ pinned: false
11
+ short_description: Streamlit template space
12
+ ---
13
+
14
+ # Welcome to Streamlit!
15
+
16
+ Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
17
+
18
+ If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
19
+ forums](https://discuss.streamlit.io).
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ altair
2
+ pandas
3
+ streamlit
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/src/streamlit_app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import altair as alt
2
+ import numpy as np
3
+ import pandas as pd
4
+ import streamlit as st
5
+
6
+ """
7
+ # Welcome to Streamlit!
8
+
9
+ Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
+ If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
+ forums](https://discuss.streamlit.io).
12
+
13
+ In the meantime, below is an example of what you can do with just a few lines of code:
14
+ """
15
+
16
+ num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
+ num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
+
19
+ indices = np.linspace(0, 1, num_points)
20
+ theta = 2 * np.pi * num_turns * indices
21
+ radius = indices
22
+
23
+ x = radius * np.cos(theta)
24
+ y = radius * np.sin(theta)
25
+
26
+ df = pd.DataFrame({
27
+ "x": x,
28
+ "y": y,
29
+ "idx": indices,
30
+ "rand": np.random.randn(num_points),
31
+ })
32
+
33
+ st.altair_chart(alt.Chart(df, height=700, width=700)
34
+ .mark_point(filled=True)
35
+ .encode(
36
+ x=alt.X("x", axis=None),
37
+ y=alt.Y("y", axis=None),
38
+ color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
+ size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
+ ))
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt CHANGED
@@ -1,3 +1,29 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core
2
+ streamlit>=1.32.0
3
+ pandas>=2.0.0
4
+ numpy>=1.24.0
5
+
6
+ # Hugging Face
7
+ huggingface_hub>=0.21.0
8
+ datasets>=2.18.0
9
+
10
+ # Machine Learning
11
+ tensorflow>=2.14.0
12
+ scikit-learn>=1.3.0
13
+ xgboost>=2.0.0
14
+
15
+ # Wavelet (Approach 1)
16
+ PyWavelets>=1.5.0
17
+
18
+ # Regime detection (Approach 2)
19
+ hmmlearn>=0.3.0
20
+
21
+ # Visualisation
22
+ plotly>=5.18.0
23
+
24
+ # NYSE Calendar
25
+ pandas_market_calendars>=4.3.0
26
+ pytz>=2024.1
27
+
28
+ # Parquet
29
+ pyarrow>=14.0.0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ models/base.py
3
+ Shared utilities for all three CNN-LSTM variants:
4
+ - Data preparation (sequences, train/val/test split)
5
+ - Common Keras layers / callbacks
6
+ - Predict + evaluate helpers
7
+ """
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+ from sklearn.preprocessing import RobustScaler
12
+ import tensorflow as tf
13
+ from tensorflow import keras
14
+
15
+ # ── Reproducibility ───────────────────────────────────────────────────────────
16
+ SEED = 42
17
+ tf.random.set_seed(SEED)
18
+ np.random.seed(SEED)
19
+
20
+
21
+ # ── Sequence builder ──────────────────────────────────────────────────────────
22
+
23
+ def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int):
24
+ """
25
+ Build supervised sequences for CNN-LSTM input.
26
+
27
+ Args:
28
+ features : 2-D array [n_days, n_features]
29
+ targets : 2-D array [n_days, n_etfs] (raw returns)
30
+ lookback : number of past days per sample
31
+
32
+ Returns:
33
+ X : [n_samples, lookback, n_features]
34
+ y : [n_samples, n_etfs] (raw returns for the next day)
35
+ """
36
+ X, y = [], []
37
+ for i in range(lookback, len(features)):
38
+ X.append(features[i - lookback: i])
39
+ y.append(targets[i])
40
+ return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)
41
+
42
+
43
+ # ── Train / val / test split ──────────────────────────────────────────────────
44
+
45
+ def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15):
46
+ """Split sequences into train / val / test preserving temporal order."""
47
+ n = len(X)
48
+ t1 = int(n * train_pct)
49
+ t2 = int(n * (train_pct + val_pct))
50
+
51
+ return (
52
+ X[:t1], y[:t1],
53
+ X[t1:t2], y[t1:t2],
54
+ X[t2:], y[t2:],
55
+ )
56
+
57
+
58
+ # ── Feature scaling ───────────────────────────────────────────────────────────
59
+
60
+ def scale_features(X_train, X_val, X_test):
61
+ """
62
+ Fit RobustScaler on training data only, apply to val and test.
63
+ Operates on the flattened feature dimension.
64
+
65
+ Returns scaled arrays with same shape as inputs.
66
+ """
67
+ n_train, lb, n_feat = X_train.shape
68
+ scaler = RobustScaler()
69
+
70
+ # Fit on train
71
+ scaler.fit(X_train.reshape(-1, n_feat))
72
+
73
+ def _transform(X):
74
+ shape = X.shape
75
+ return scaler.transform(X.reshape(-1, n_feat)).reshape(shape)
76
+
77
+ return _transform(X_train), _transform(X_val), _transform(X_test), scaler
78
+
79
+
80
+ # ── Label builder (classification: argmax of returns) ────────────────────────
81
+
82
+ def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0):
83
+ """
84
+ Convert raw return matrix to integer class labels.
85
+
86
+ If include_cash=True, adds a CASH class (index = n_etfs) when
87
+ the best ETF return is below cash_threshold.
88
+
89
+ Args:
90
+ y_raw : [n_samples, n_etfs]
91
+ include_cash : whether to allow CASH class
92
+ cash_threshold : minimum ETF return to prefer over CASH
93
+
94
+ Returns:
95
+ labels : [n_samples] integer class indices
96
+ """
97
+ best = np.argmax(y_raw, axis=1)
98
+ if include_cash:
99
+ best_return = y_raw[np.arange(len(y_raw)), best]
100
+ cash_idx = y_raw.shape[1]
101
+ labels = np.where(best_return < cash_threshold, cash_idx, best)
102
+ else:
103
+ labels = best
104
+ return labels.astype(np.int32)
105
+
106
+
107
+ # ── Common Keras callbacks ────────────────────────────────────────────────────
108
+
109
+ def get_callbacks(patience_es=15, patience_lr=8, min_lr=1e-6):
110
+ """Standard early stopping + reduce-LR callbacks shared by all models."""
111
+ return [
112
+ keras.callbacks.EarlyStopping(
113
+ monitor="val_loss",
114
+ patience=patience_es,
115
+ restore_best_weights=True,
116
+ verbose=0,
117
+ ),
118
+ keras.callbacks.ReduceLROnPlateau(
119
+ monitor="val_loss",
120
+ factor=0.5,
121
+ patience=patience_lr,
122
+ min_lr=min_lr,
123
+ verbose=0,
124
+ ),
125
+ ]
126
+
127
+
128
+ # ── Common output head ────────────────────────────────────────────────────────
129
+
130
+ def classification_head(x, n_classes: int, dropout: float = 0.3):
131
+ """
132
+ Shared dense output head for all three CNN-LSTM variants.
133
+
134
+ Args:
135
+ x : input tensor
136
+ n_classes : number of ETF classes (+ 1 for CASH if applicable)
137
+ dropout : dropout rate
138
+
139
+ Returns:
140
+ output tensor with softmax activation
141
+ """
142
+ x = keras.layers.Dense(64, activation="relu")(x)
143
+ x = keras.layers.Dropout(dropout)(x)
144
+ x = keras.layers.Dense(n_classes, activation="softmax")(x)
145
+ return x
146
+
147
+
148
+ # ── Prediction helper ─────────────────────────────────────────────────────────
149
+
150
+ def predict_classes(model, X_test: np.ndarray) -> np.ndarray:
151
+ """Return integer class predictions from a Keras model."""
152
+ proba = model.predict(X_test, verbose=0)
153
+ return np.argmax(proba, axis=1), proba
154
+
155
+
156
+ # ── Metrics helper ────────────────────────────────────────────────────────────
157
+
158
+ def evaluate_returns(
159
+ preds: np.ndarray,
160
+ proba: np.ndarray,
161
+ y_raw_test: np.ndarray,
162
+ target_etfs: list,
163
+ tbill_rate: float,
164
+ fee_bps: int,
165
+ include_cash: bool = True,
166
+ ):
167
+ """
168
+ Given integer class predictions and raw return matrix,
169
+ compute strategy returns and summary metrics.
170
+
171
+ Returns:
172
+ strat_rets : np.ndarray of daily net returns
173
+ ann_return : annualised return (float)
174
+ cum_returns : cumulative return series
175
+ last_proba : probability vector for the last prediction
176
+ next_etf : name of ETF predicted for next session
177
+ """
178
+ n_etfs = len(target_etfs)
179
+ strat_rets = []
180
+
181
+ for i, cls in enumerate(preds):
182
+ if include_cash and cls == n_etfs:
183
+ # CASH: earn daily T-bill rate
184
+ daily_tbill = tbill_rate / 252
185
+ net = daily_tbill - (fee_bps / 10000)
186
+ else:
187
+ ret = y_raw_test[i][cls]
188
+ net = ret - (fee_bps / 10000)
189
+ strat_rets.append(net)
190
+
191
+ strat_rets = np.array(strat_rets)
192
+ cum_returns = np.cumprod(1 + strat_rets)
193
+ ann_return = (cum_returns[-1] ** (252 / len(strat_rets))) - 1
194
+
195
+ last_proba = proba[-1]
196
+ next_cls = int(np.argmax(last_proba))
197
+ next_etf = "CASH" if (include_cash and next_cls == n_etfs) else target_etfs[next_cls].replace("_Ret", "")
198
+
199
+ return strat_rets, ann_return, cum_returns, last_proba, next_etf
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ models/approach1_wavelet.py
3
+ Approach 1: Wavelet Decomposition CNN-LSTM
4
+
5
+ Pipeline:
6
+ Raw macro signals
7
+ → DWT (db4, level=3) per signal → multi-band channel stack
8
+ → 1D CNN (64 filters, k=3) → MaxPool → (32 filters, k=3)
9
+ → LSTM (128 units)
10
+ → Dense 64 → Softmax (n_etfs + 1 CASH)
11
+ """
12
+
13
+ import numpy as np
14
+ import pywt
15
+ import tensorflow as tf
16
+ from tensorflow import keras
17
+ from models.base import classification_head, get_callbacks
18
+
19
+ WAVELET = "db4"
20
+ LEVEL = 3
21
+
22
+
23
+ # ── Wavelet feature engineering ───────────────────────────────────────────────
24
+
25
+ def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> np.ndarray:
26
+ """
27
+ Decompose a 1-D signal into DWT subbands and return them stacked.
28
+
29
+ For a signal of length T:
30
+ coeffs = [cA_n, cD_n, cD_{n-1}, ..., cD_1]
31
+ We interpolate each subband back to length T so we can stack them.
32
+
33
+ Returns: array of shape [T, level+1]
34
+ """
35
+ T = len(signal)
36
+ coeffs = pywt.wavedec(signal, wavelet, level=level)
37
+ bands = []
38
+ for c in coeffs:
39
+ # Interpolate back to original length
40
+ band = np.interp(
41
+ np.linspace(0, len(c) - 1, T),
42
+ np.arange(len(c)),
43
+ c,
44
+ )
45
+ bands.append(band)
46
+ return np.stack(bands, axis=-1) # [T, level+1]
47
+
48
+
49
+ def apply_wavelet_transform(X: np.ndarray, wavelet: str = WAVELET, level: int = LEVEL) -> np.ndarray:
50
+ """
51
+ Apply DWT to every feature channel across all samples.
52
+
53
+ Args:
54
+ X : [n_samples, lookback, n_features]
55
+
56
+ Returns:
57
+ X_wt : [n_samples, lookback, n_features * (level+1)]
58
+ """
59
+ n_samples, lookback, n_features = X.shape
60
+ n_bands = level + 1
61
+ X_wt = np.zeros((n_samples, lookback, n_features * n_bands), dtype=np.float32)
62
+
63
+ for s in range(n_samples):
64
+ for f in range(n_features):
65
+ decomposed = _wavelet_decompose_signal(X[s, :, f], wavelet, level) # [T, n_bands]
66
+ start = f * n_bands
67
+ X_wt[s, :, start: start + n_bands] = decomposed
68
+
69
+ return X_wt
70
+
71
+
72
+ # ── Model builder ─────────────────────────────────────────────────────────────
73
+
74
+ def build_wavelet_cnn_lstm(
75
+ input_shape: tuple,
76
+ n_classes: int,
77
+ dropout: float = 0.3,
78
+ lstm_units: int = 128,
79
+ ) -> keras.Model:
80
+ """
81
+ Build Wavelet CNN-LSTM model.
82
+
83
+ Args:
84
+ input_shape : (lookback, n_features * n_bands) — post-DWT shape
85
+ n_classes : number of output classes (ETFs + CASH)
86
+ dropout : dropout rate
87
+ lstm_units : LSTM hidden size
88
+
89
+ Returns:
90
+ Compiled Keras model
91
+ """
92
+ inputs = keras.Input(shape=input_shape, name="wavelet_input")
93
+
94
+ # CNN block 1
95
+ x = keras.layers.Conv1D(64, kernel_size=3, padding="causal", activation="relu")(inputs)
96
+ x = keras.layers.BatchNormalization()(x)
97
+ x = keras.layers.MaxPooling1D(pool_size=2)(x)
98
+
99
+ # CNN block 2
100
+ x = keras.layers.Conv1D(32, kernel_size=3, padding="causal", activation="relu")(x)
101
+ x = keras.layers.BatchNormalization()(x)
102
+ x = keras.layers.Dropout(dropout)(x)
103
+
104
+ # LSTM
105
+ x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(x)
106
+
107
+ # Output head
108
+ outputs = classification_head(x, n_classes, dropout)
109
+
110
+ model = keras.Model(inputs, outputs, name="Approach1_Wavelet_CNN_LSTM")
111
+ model.compile(
112
+ optimizer=keras.optimizers.Adam(learning_rate=1e-3),
113
+ loss="sparse_categorical_crossentropy",
114
+ metrics=["accuracy"],
115
+ )
116
+ return model
117
+
118
+
119
+ # ── Full train pipeline ───────────────────────────────────────────────────────
120
+
121
+ def train_approach1(
122
+ X_train, y_train,
123
+ X_val, y_val,
124
+ n_classes: int,
125
+ epochs: int = 100,
126
+ batch_size: int = 32,
127
+ dropout: float = 0.3,
128
+ lstm_units: int = 128,
129
+ ):
130
+ """
131
+ Apply wavelet transform then train the CNN-LSTM.
132
+
133
+ Args:
134
+ X_train/val : [n, lookback, n_features] (scaled, pre-wavelet)
135
+ y_train/val : [n] integer class labels
136
+ n_classes : total output classes
137
+
138
+ Returns:
139
+ model : trained Keras model
140
+ history : training history
141
+ wt_shape : post-DWT input shape (for inference)
142
+ """
143
+ # Apply DWT
144
+ X_train_wt = apply_wavelet_transform(X_train)
145
+ X_val_wt = apply_wavelet_transform(X_val)
146
+
147
+ input_shape = X_train_wt.shape[1:] # (lookback, n_features * n_bands)
148
+ model = build_wavelet_cnn_lstm(input_shape, n_classes, dropout, lstm_units)
149
+
150
+ history = model.fit(
151
+ X_train_wt, y_train,
152
+ validation_data=(X_val_wt, y_val),
153
+ epochs=epochs,
154
+ batch_size=batch_size,
155
+ callbacks=get_callbacks(),
156
+ verbose=0,
157
+ )
158
+
159
+ return model, history, input_shape
160
+
161
+
162
+ def predict_approach1(model, X_test: np.ndarray) -> tuple:
163
+ """Apply DWT to test set then predict. Returns (class_preds, proba)."""
164
+ X_test_wt = apply_wavelet_transform(X_test)
165
+ proba = model.predict(X_test_wt, verbose=0)
166
+ preds = np.argmax(proba, axis=1)
167
+ return preds, proba
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py ADDED
@@ -0,0 +1 @@
 
 
1
+
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ models/approach3_multiscale.py
3
+ Approach 3: Multi-Scale Parallel CNN-LSTM
4
+
5
+ Pipeline:
6
+ Raw macro signals
7
+ → 3 parallel CNN towers: kernel 3 (short), 7 (medium), 21 (long)
8
+ → Concatenate [96 features]
9
+ → LSTM (128 units)
10
+ → Dense 64 → Softmax (n_etfs + 1 CASH)
11
+ """
12
+
13
+ import numpy as np
14
+ import tensorflow as tf
15
+ from tensorflow import keras
16
+ from models.base import classification_head, get_callbacks
17
+
18
+ # Kernel sizes represent: momentum (3d), weekly cycle (7d), monthly trend (21d)
19
+ KERNEL_SIZES = [3, 7, 21]
20
+ FILTERS_EACH = 32 # 32 × 3 towers = 96 concatenated features
21
+
22
+
23
+ # ── Model builder ─────────────────────────────────────────────────────────────
24
+
25
+ def build_multiscale_cnn_lstm(
26
+ input_shape: tuple,
27
+ n_classes: int,
28
+ kernel_sizes: list = None,
29
+ filters: int = FILTERS_EACH,
30
+ dropout: float = 0.3,
31
+ lstm_units: int = 128,
32
+ ) -> keras.Model:
33
+ """
34
+ Multi-scale parallel CNN-LSTM.
35
+
36
+ Three CNN towers with different kernel sizes run in parallel on the
37
+ same input, capturing momentum, weekly cycle, and monthly trend
38
+ simultaneously. Their outputs are concatenated before the LSTM.
39
+
40
+ Args:
41
+ input_shape : (lookback, n_features)
42
+ n_classes : number of output classes (ETFs + CASH)
43
+ kernel_sizes : list of kernel sizes for each tower
44
+ filters : number of Conv1D filters per tower
45
+ dropout : dropout rate
46
+ lstm_units : LSTM hidden size
47
+
48
+ Returns:
49
+ Compiled Keras model
50
+ """
51
+ if kernel_sizes is None:
52
+ kernel_sizes = KERNEL_SIZES
53
+
54
+ inputs = keras.Input(shape=input_shape, name="multiscale_input")
55
+
56
+ towers = []
57
+ for k in kernel_sizes:
58
+ # Each tower: Conv → BN → Conv → BN → GlobalAvgPool
59
+ t = keras.layers.Conv1D(
60
+ filters, kernel_size=k, padding="causal", activation="relu",
61
+ name=f"conv1_k{k}"
62
+ )(inputs)
63
+ t = keras.layers.BatchNormalization(name=f"bn1_k{k}")(t)
64
+ t = keras.layers.Conv1D(
65
+ filters, kernel_size=k, padding="causal", activation="relu",
66
+ name=f"conv2_k{k}"
67
+ )(t)
68
+ t = keras.layers.BatchNormalization(name=f"bn2_k{k}")(t)
69
+ t = keras.layers.Dropout(dropout, name=f"drop_k{k}")(t)
70
+ towers.append(t)
71
+
72
+ # Concatenate along the feature dimension — keeps temporal axis intact for LSTM
73
+ if len(towers) > 1:
74
+ merged = keras.layers.Concatenate(axis=-1, name="tower_concat")(towers)
75
+ else:
76
+ merged = towers[0]
77
+
78
+ # LSTM integrates multi-scale temporal features
79
+ x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1, name="lstm")(merged)
80
+
81
+ # Output head
82
+ outputs = classification_head(x, n_classes, dropout)
83
+
84
+ model = keras.Model(inputs, outputs, name="Approach3_MultiScale_CNN_LSTM")
85
+ model.compile(
86
+ optimizer=keras.optimizers.Adam(learning_rate=1e-3),
87
+ loss="sparse_categorical_crossentropy",
88
+ metrics=["accuracy"],
89
+ )
90
+ return model
91
+
92
+
93
+ # ── Full train pipeline ───────────────────────────────────────────────────────
94
+
95
+ def train_approach3(
96
+ X_train, y_train,
97
+ X_val, y_val,
98
+ n_classes: int,
99
+ epochs: int = 100,
100
+ batch_size: int = 32,
101
+ dropout: float = 0.3,
102
+ lstm_units: int = 128,
103
+ kernel_sizes: list = None,
104
+ ):
105
+ """
106
+ Build and train the multi-scale CNN-LSTM.
107
+
108
+ Args:
109
+ X_train/val : [n, lookback, n_features]
110
+ y_train/val : [n] integer class labels
111
+ n_classes : total output classes
112
+
113
+ Returns:
114
+ model : trained Keras model
115
+ history : training history
116
+ """
117
+ if kernel_sizes is None:
118
+ kernel_sizes = KERNEL_SIZES
119
+
120
+ # Guard: lookback must be >= largest kernel
121
+ lookback = X_train.shape[1]
122
+ valid_kernels = [k for k in kernel_sizes if k <= lookback]
123
+ if not valid_kernels:
124
+ valid_kernels = [min(3, lookback)]
125
+
126
+ model = build_multiscale_cnn_lstm(
127
+ input_shape=X_train.shape[1:],
128
+ n_classes=n_classes,
129
+ kernel_sizes=valid_kernels,
130
+ dropout=dropout,
131
+ lstm_units=lstm_units,
132
+ )
133
+
134
+ history = model.fit(
135
+ X_train, y_train,
136
+ validation_data=(X_val, y_val),
137
+ epochs=epochs,
138
+ batch_size=batch_size,
139
+ callbacks=get_callbacks(),
140
+ verbose=0,
141
+ )
142
+
143
+ return model, history
144
+
145
+
146
+ def predict_approach3(model, X_test: np.ndarray) -> tuple:
147
+ """Predict on test set. Returns (class_preds, proba)."""
148
+ proba = model.predict(X_test, verbose=0)
149
+ preds = np.argmax(proba, axis=1)
150
+ return preds, proba
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/backtest.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ strategy/backtest.py
3
+ Strategy execution, performance metrics, and benchmark calculations.
4
+ Supports CASH as a class (earns T-bill rate when selected).
5
+ """
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ from datetime import datetime
10
+
11
+
12
+ # ── Strategy execution ────────────────────────────────────────────────────────
13
+
14
+ def execute_strategy(
15
+ preds: np.ndarray,
16
+ proba: np.ndarray,
17
+ y_raw_test: np.ndarray,
18
+ test_dates: pd.DatetimeIndex,
19
+ target_etfs: list,
20
+ fee_bps: int,
21
+ tbill_rate: float,
22
+ include_cash: bool = True,
23
+ ) -> dict:
24
+ """
25
+ Execute strategy from model predictions.
26
+
27
+ Args:
28
+ preds : [n] integer class predictions
29
+ proba : [n, n_classes] softmax probabilities
30
+ y_raw_test : [n, n_etfs] actual next-day ETF returns
31
+ test_dates : DatetimeIndex aligned with y_raw_test
32
+ target_etfs : list of ETF return column names e.g. ["TLT_Ret", ...]
33
+ fee_bps : transaction fee in basis points
34
+ tbill_rate : annualised 3m T-bill rate (e.g. 0.045)
35
+ include_cash: whether CASH is a valid class (index = n_etfs)
36
+
37
+ Returns:
38
+ dict with keys:
39
+ strat_rets, cum_returns, ann_return, sharpe,
40
+ hit_ratio, max_dd, max_daily_dd, cum_max,
41
+ audit_trail, next_signal, next_proba
42
+ """
43
+ n_etfs = len(target_etfs)
44
+ daily_tbill = tbill_rate / 252
45
+ today = datetime.now().date()
46
+
47
+ strat_rets = []
48
+ audit_trail = []
49
+
50
+ for i, cls in enumerate(preds):
51
+ if include_cash and cls == n_etfs:
52
+ signal_etf = "CASH"
53
+ realized_ret = daily_tbill
54
+ else:
55
+ cls = min(cls, n_etfs - 1)
56
+ signal_etf = target_etfs[cls].replace("_Ret", "")
57
+ realized_ret = float(y_raw_test[i][cls])
58
+
59
+ net_ret = realized_ret - (fee_bps / 10000)
60
+ strat_rets.append(net_ret)
61
+
62
+ trade_date = test_dates[i]
63
+ if trade_date.date() < today:
64
+ audit_trail.append({
65
+ "Date": trade_date.strftime("%Y-%m-%d"),
66
+ "Signal": signal_etf,
67
+ "Realized": realized_ret,
68
+ "Net_Return": net_ret,
69
+ })
70
+
71
+ strat_rets = np.array(strat_rets, dtype=np.float64)
72
+
73
+ # Next signal (last prediction)
74
+ last_cls = int(preds[-1])
75
+ next_proba = proba[-1]
76
+
77
+ if include_cash and last_cls == n_etfs:
78
+ next_signal = "CASH"
79
+ else:
80
+ last_cls = min(last_cls, n_etfs - 1)
81
+ next_signal = target_etfs[last_cls].replace("_Ret", "")
82
+
83
+ metrics = _compute_metrics(strat_rets, tbill_rate)
84
+
85
+ return {
86
+ **metrics,
87
+ "strat_rets": strat_rets,
88
+ "audit_trail": audit_trail,
89
+ "next_signal": next_signal,
90
+ "next_proba": next_proba,
91
+ }
92
+
93
+
94
+ # ── Performance metrics ───────────────────────────────────────────────────────
95
+
96
+ def _compute_metrics(strat_rets: np.ndarray, tbill_rate: float) -> dict:
97
+ if len(strat_rets) == 0:
98
+ return {}
99
+
100
+ cum_returns = np.cumprod(1 + strat_rets)
101
+ n = len(strat_rets)
102
+ ann_return = float(cum_returns[-1] ** (252 / n) - 1)
103
+
104
+ excess = strat_rets - tbill_rate / 252
105
+ sharpe = float(np.mean(excess) / (np.std(strat_rets) + 1e-9) * np.sqrt(252))
106
+
107
+ recent = strat_rets[-15:]
108
+ hit_ratio = float(np.mean(recent > 0))
109
+
110
+ cum_max = np.maximum.accumulate(cum_returns)
111
+ drawdown = (cum_returns - cum_max) / cum_max
112
+ max_dd = float(np.min(drawdown))
113
+ max_daily = float(np.min(strat_rets))
114
+
115
+ return {
116
+ "cum_returns": cum_returns,
117
+ "ann_return": ann_return,
118
+ "sharpe": sharpe,
119
+ "hit_ratio": hit_ratio,
120
+ "max_dd": max_dd,
121
+ "max_daily_dd":max_daily,
122
+ "cum_max": cum_max,
123
+ }
124
+
125
+
126
+ def compute_benchmark_metrics(returns: np.ndarray, tbill_rate: float) -> dict:
127
+ """Compute metrics for a benchmark return series."""
128
+ return _compute_metrics(returns, tbill_rate)
129
+
130
+
131
+ # ── Winner selection ──────────────────────────────────────────────────────────
132
+
133
+ def select_winner(results: dict) -> str:
134
+ """
135
+ Given a dict of {approach_name: result_dict}, return the approach name
136
+ with the highest annualised return (raw, not risk-adjusted).
137
+
138
+ Args:
139
+ results : {"Approach 1": {...}, "Approach 2": {...}, "Approach 3": {...}}
140
+
141
+ Returns:
142
+ winner_name : str
143
+ """
144
+ best_name = None
145
+ best_return = -np.inf
146
+
147
+ for name, res in results.items():
148
+ if res is None:
149
+ continue
150
+ ret = res.get("ann_return", -np.inf)
151
+ if ret > best_return:
152
+ best_return = ret
153
+ best_name = name
154
+
155
+ return best_name
156
+
157
+
158
+ # ── Comparison table ──────────────────────────────────────────────────────────
159
+
160
+ def build_comparison_table(results: dict, winner_name: str) -> pd.DataFrame:
161
+ """
162
+ Build a summary DataFrame comparing all three approaches.
163
+
164
+ Args:
165
+ results : {name: result_dict}
166
+ winner_name : name of the winner
167
+
168
+ Returns:
169
+ pd.DataFrame with one row per approach
170
+ """
171
+ rows = []
172
+ for name, res in results.items():
173
+ if res is None:
174
+ rows.append({
175
+ "Approach": name,
176
+ "Ann. Return": "N/A",
177
+ "Sharpe": "N/A",
178
+ "Hit Ratio (15d)":"N/A",
179
+ "Max Drawdown": "N/A",
180
+ "Winner": "",
181
+ })
182
+ continue
183
+
184
+ rows.append({
185
+ "Approach": name,
186
+ "Ann. Return": f"{res['ann_return']*100:.2f}%",
187
+ "Sharpe": f"{res['sharpe']:.2f}",
188
+ "Hit Ratio (15d)": f"{res['hit_ratio']*100:.0f}%",
189
+ "Max Drawdown": f"{res['max_dd']*100:.2f}%",
190
+ "Winner": "⭐ WINNER" if name == winner_name else "",
191
+ })
192
+
193
+ return pd.DataFrame(rows)
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/conviction.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ signals/conviction.py
3
+ Signal conviction scoring via Z-score of model probabilities.
4
+ """
5
+
6
+ import numpy as np
7
+
8
+
9
+ CONVICTION_THRESHOLDS = {
10
+ "Very High": 2.0,
11
+ "High": 1.0,
12
+ "Moderate": 0.0,
13
+ # Below 0.0 → "Low"
14
+ }
15
+
16
+
17
+ def compute_conviction(proba: np.ndarray, target_etfs: list, include_cash: bool = True) -> dict:
18
+ """
19
+ Compute Z-score conviction for the selected signal.
20
+
21
+ Args:
22
+ proba : 1-D softmax probability vector [n_classes]
23
+ target_etfs : list of ETF return column names (e.g. ["TLT_Ret", ...])
24
+ include_cash: whether CASH is the last class
25
+
26
+ Returns:
27
+ dict with keys:
28
+ best_idx : int
29
+ best_name : str (ETF ticker or "CASH")
30
+ z_score : float
31
+ label : str ("Very High" / "High" / "Moderate" / "Low")
32
+ scores : np.ndarray (raw proba)
33
+ etf_names : list of display names
34
+ sorted_pairs : list of (name, score) sorted high→low
35
+ """
36
+ scores = np.array(proba, dtype=float)
37
+ best_idx = int(np.argmax(scores))
38
+ n_etfs = len(target_etfs)
39
+
40
+ # Display names
41
+ etf_names = [e.replace("_Ret", "") for e in target_etfs]
42
+ if include_cash:
43
+ etf_names = etf_names + ["CASH"]
44
+
45
+ best_name = etf_names[best_idx] if best_idx < len(etf_names) else "CASH"
46
+
47
+ # Z-score
48
+ mean = np.mean(scores)
49
+ std = np.std(scores)
50
+ z = float((scores[best_idx] - mean) / std) if std > 1e-9 else 0.0
51
+
52
+ # Label
53
+ label = "Low"
54
+ for lbl, threshold in CONVICTION_THRESHOLDS.items():
55
+ if z >= threshold:
56
+ label = lbl
57
+ break
58
+
59
+ # Sorted pairs for UI bar chart
60
+ sorted_pairs = sorted(
61
+ zip(etf_names, scores),
62
+ key=lambda x: x[1],
63
+ reverse=True,
64
+ )
65
+
66
+ return {
67
+ "best_idx": best_idx,
68
+ "best_name": best_name,
69
+ "z_score": z,
70
+ "label": label,
71
+ "scores": scores,
72
+ "etf_names": etf_names,
73
+ "sorted_pairs": sorted_pairs,
74
+ }
75
+
76
+
77
+ def conviction_color(label: str) -> str:
78
+ """Return hex accent colour for a conviction label."""
79
+ return {
80
+ "Very High": "#00b894",
81
+ "High": "#00cec9",
82
+ "Moderate": "#fdcb6e",
83
+ "Low": "#d63031",
84
+ }.get(label, "#888888")
85
+
86
+
87
+ def conviction_icon(label: str) -> str:
88
+ return {
89
+ "Very High": "🟢",
90
+ "High": "🟢",
91
+ "Moderate": "🟡",
92
+ "Low": "🔴",
93
+ }.get(label, "⚪")
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ui/components.py
3
+ Reusable Streamlit UI blocks:
4
+ - Freshness warning banner
5
+ - Next trading day signal banner
6
+ - Signal conviction panel
7
+ - Metrics row
8
+ - Audit trail table
9
+ - Comparison summary table
10
+ """
11
+
12
+ import streamlit as st
13
+ import pandas as pd
14
+ import numpy as np
15
+
16
+ from signals.conviction import conviction_color, conviction_icon
17
+
18
+
19
+ # ── Freshness warning ─────────────────────────────────────────────────────────
20
+
21
+ def show_freshness_status(freshness: dict):
22
+ """Display data freshness status. Stops app if data is stale."""
23
+ if freshness.get("fresh"):
24
+ st.success(freshness["message"])
25
+ else:
26
+ st.warning(freshness["message"])
27
+
28
+
29
+ # ── Next trading day banner ───────────────────────────────────────────────────
30
+
31
+ def show_signal_banner(next_signal: str, next_date, approach_name: str):
32
+ """Large coloured banner showing the winning approach's next signal."""
33
+ is_cash = next_signal == "CASH"
34
+ bg = "linear-gradient(135deg, #2d3436 0%, #1a1a2e 100%)" if is_cash else \
35
+ "linear-gradient(135deg, #00d1b2 0%, #00a896 100%)"
36
+
37
+ st.markdown(f"""
38
+ <div style="background:{bg}; padding:25px; border-radius:15px;
39
+ text-align:center; box-shadow:0 8px 16px rgba(0,0,0,0.3);
40
+ margin:16px 0;">
41
+ <div style="color:rgba(255,255,255,0.7); font-size:12px;
42
+ letter-spacing:3px; margin-bottom:6px;">
43
+ {approach_name.upper()} · NEXT TRADING DAY SIGNAL
44
+ </div>
45
+ <h1 style="color:white; font-size:44px; margin:0 0 8px 0;
46
+ font-weight:800; text-shadow:2px 2px 4px rgba(0,0,0,0.3);">
47
+ 🎯 {next_date.strftime('%Y-%m-%d')} → {next_signal}
48
+ </h1>
49
+ </div>
50
+ """, unsafe_allow_html=True)
51
+
52
+
53
+ # ── Signal conviction panel ───────────────────────────────────────────────────
54
+
55
+ def show_conviction_panel(conviction: dict):
56
+ """
57
+ White-background conviction panel with Z-score gauge and per-ETF bars.
58
+ Uses separate st.markdown calls per ETF row to avoid Streamlit HTML escaping.
59
+ """
60
+ label = conviction["label"]
61
+ z_score = conviction["z_score"]
62
+ best_name = conviction["best_name"]
63
+ sorted_pairs = conviction["sorted_pairs"]
64
+
65
+ color = conviction_color(label)
66
+ icon = conviction_icon(label)
67
+
68
+ z_clipped = max(-3.0, min(3.0, z_score))
69
+ bar_pct = int((z_clipped + 3) / 6 * 100)
70
+
71
+ max_score = max(s for _, s in sorted_pairs) if sorted_pairs else 1.0
72
+ if max_score <= 0:
73
+ max_score = 1.0
74
+
75
+ # ── Header + gauge ────────────────────────────────────────────────────────
76
+ st.markdown(f"""
77
+ <div style="background:#ffffff; border:1px solid #ddd;
78
+ border-left:5px solid {color}; border-radius:12px 12px 0 0;
79
+ padding:18px 24px 12px 24px; margin:12px 0 0 0;
80
+ box-shadow:0 2px 8px rgba(0,0,0,0.07);">
81
+
82
+ <div style="display:flex; align-items:center; gap:12px;
83
+ margin-bottom:14px; flex-wrap:wrap;">
84
+ <span style="font-size:20px;">{icon}</span>
85
+ <span style="font-size:18px; font-weight:700; color:#1a1a1a;">Signal Conviction</span>
86
+ <span style="background:#f0f0f0; border:1px solid {color};
87
+ color:{color}; font-weight:700; font-size:14px;
88
+ padding:3px 12px; border-radius:8px;">
89
+ Z = {z_score:.2f} &sigma;
90
+ </span>
91
+ <span style="margin-left:auto; background:{color}; color:#fff;
92
+ font-weight:700; padding:4px 16px;
93
+ border-radius:20px; font-size:13px;">
94
+ {label}
95
+ </span>
96
+ </div>
97
+
98
+ <div style="display:flex; justify-content:space-between;
99
+ font-size:11px; color:#999; margin-bottom:4px;">
100
+ <span>Weak &minus;3&sigma;</span>
101
+ <span>Neutral 0&sigma;</span>
102
+ <span>Strong +3&sigma;</span>
103
+ </div>
104
+ <div style="background:#f0f0f0; border-radius:8px; height:14px;
105
+ overflow:hidden; position:relative; border:1px solid #e0e0e0;
106
+ margin-bottom:14px;">
107
+ <div style="position:absolute; left:50%; top:0; width:2px;
108
+ height:100%; background:#ccc;"></div>
109
+ <div style="width:{bar_pct}%; height:100%;
110
+ background:linear-gradient(90deg,#fab1a0,{color});
111
+ border-radius:8px;"></div>
112
+ </div>
113
+
114
+ <div style="font-size:12px; color:#999; margin-bottom:2px;">
115
+ Model probability by ETF (ranked high &rarr; low):
116
+ </div>
117
+ </div>
118
+ """, unsafe_allow_html=True)
119
+
120
+ # ── Per-ETF rows ──────────────────────────────────────────────────────────
121
+ for i, (name, score) in enumerate(sorted_pairs):
122
+ is_winner = (name == best_name)
123
+ is_last = (i == len(sorted_pairs) - 1)
124
+ bar_w = int(score / max_score * 100)
125
+ name_style = "font-weight:700; color:#00897b;" if is_winner else "color:#444;"
126
+ bar_color = color if is_winner else "#b2dfdb" if score > max_score * 0.5 else "#e0e0e0"
127
+ star = " ★" if is_winner else ""
128
+ bottom_r = "0 0 12px 12px" if is_last else "0"
129
+ border_bot = "border-bottom:1px solid #f0f0f0;" if not is_last else ""
130
+
131
+ st.markdown(f"""
132
+ <div style="background:#ffffff; border:1px solid #ddd; border-top:none;
133
+ border-radius:{bottom_r}; padding:7px 24px; {border_bot}
134
+ box-shadow:0 2px 8px rgba(0,0,0,0.07);">
135
+ <div style="display:flex; align-items:center; gap:12px;">
136
+ <span style="width:44px; text-align:right; font-size:13px; {name_style}">{name}{star}</span>
137
+ <div style="flex:1; background:#f5f5f5; border-radius:4px;
138
+ height:14px; overflow:hidden; border:1px solid #e8e8e8;">
139
+ <div style="width:{bar_w}%; height:100%;
140
+ background:{bar_color}; border-radius:4px;"></div>
141
+ </div>
142
+ <span style="width:56px; font-size:12px; color:#888; text-align:right;">{score:.4f}</span>
143
+ </div>
144
+ </div>
145
+ """, unsafe_allow_html=True)
146
+
147
+ st.caption(
148
+ "Z-score = std deviations the top ETF's probability sits above the mean of all ETF probabilities. "
149
+ "Higher → model is more decisive."
150
+ )
151
+
152
+
153
+ # ── Metrics row ───────────────────────────────────────────────────────────────
154
+
155
+ def show_metrics_row(result: dict, tbill_rate: float):
156
+ """Five-column metric display."""
157
+ col1, col2, col3, col4, col5 = st.columns(5)
158
+
159
+ col1.metric(
160
+ "📈 Annualised Return",
161
+ f"{result['ann_return']*100:.2f}%",
162
+ delta=f"vs T-bill: {(result['ann_return'] - tbill_rate)*100:.2f}%",
163
+ )
164
+ col2.metric(
165
+ "📊 Sharpe Ratio",
166
+ f"{result['sharpe']:.2f}",
167
+ delta="Risk-Adjusted" if result['sharpe'] > 1 else "Below Threshold",
168
+ )
169
+ col3.metric(
170
+ "🎯 Hit Ratio (15d)",
171
+ f"{result['hit_ratio']*100:.0f}%",
172
+ delta="Strong" if result['hit_ratio'] > 0.6 else "Weak",
173
+ )
174
+ col4.metric(
175
+ "📉 Max Drawdown",
176
+ f"{result['max_dd']*100:.2f}%",
177
+ delta="Peak to Trough",
178
+ )
179
+ col5.metric(
180
+ "⚠️ Max Daily DD",
181
+ f"{result['max_daily_dd']*100:.2f}%",
182
+ delta="Worst Day",
183
+ )
184
+
185
+
186
+ # ── Comparison table ──────────────────────────────────────────────────────────
187
+
188
+ def show_comparison_table(comparison_df: pd.DataFrame):
189
+ """Styled comparison table for all three approaches."""
190
+ def highlight_winner(row):
191
+ if "WINNER" in str(row.get("Winner", "")):
192
+ return ["background-color: rgba(0,200,150,0.15); font-weight:bold"] * len(row)
193
+ return [""] * len(row)
194
+
195
+ styled = comparison_df.style.apply(highlight_winner, axis=1).set_properties(**{
196
+ "text-align": "center",
197
+ "font-size": "14px",
198
+ }).set_table_styles([
199
+ {"selector": "th", "props": [("font-size", "14px"), ("font-weight", "bold"),
200
+ ("text-align", "center")]},
201
+ {"selector": "td", "props": [("padding", "10px")]},
202
+ ])
203
+ st.dataframe(styled, use_container_width=True)
204
+
205
+
206
+ # ── Audit trail ───────────────────────────────────────────────────────────────
207
+
208
+ def show_audit_trail(audit_trail: list):
209
+ """Last 20 days styled audit trail."""
210
+ if not audit_trail:
211
+ st.info("No audit trail data available.")
212
+ return
213
+
214
+ df = pd.DataFrame(audit_trail).tail(20)[["Date", "Signal", "Net_Return"]]
215
+
216
+ def color_return(val):
217
+ return "color: #00c896; font-weight:bold" if val > 0 else "color: #ff4b4b; font-weight:bold"
218
+
219
+ styled = df.style.applymap(color_return, subset=["Net_Return"]).format(
220
+ {"Net_Return": "{:.2%}"}
221
+ ).set_properties(**{
222
+ "font-size": "16px",
223
+ "text-align": "center",
224
+ }).set_table_styles([
225
+ {"selector": "th", "props": [("font-size", "16px"), ("font-weight", "bold"),
226
+ ("text-align", "center")]},
227
+ {"selector": "td", "props": [("padding", "10px")]},
228
+ ])
229
+ st.dataframe(styled, use_container_width=True, height=500)
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/charts.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ui/charts.py
3
+ All Plotly chart builders for the Streamlit UI.
4
+ """
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ import plotly.graph_objects as go
9
+
10
+
11
+ APPROACH_COLOURS = {
12
+ "Approach 1": "#00ffc8",
13
+ "Approach 2": "#7c6aff",
14
+ "Approach 3": "#ff6b6b",
15
+ }
16
+ BENCHMARK_COLOURS = {
17
+ "SPY": "#ff4b4b",
18
+ "AGG": "#ffa500",
19
+ }
20
+
21
+
22
+ def equity_curve_chart(
23
+ results: dict,
24
+ winner_name: str,
25
+ plot_dates: pd.DatetimeIndex,
26
+ df: pd.DataFrame,
27
+ test_slice: slice,
28
+ tbill_rate: float,
29
+ ) -> go.Figure:
30
+ """
31
+ Equity curve chart showing all three approaches + SPY + AGG benchmarks.
32
+
33
+ Args:
34
+ results : {approach_name: result_dict}
35
+ winner_name : highlighted approach
36
+ plot_dates : DatetimeIndex for x-axis
37
+ df : full DataFrame (for benchmark columns)
38
+ test_slice : slice object to extract test-period benchmark returns
39
+ tbill_rate : for benchmark metric calculation
40
+ """
41
+ from strategy.backtest import compute_benchmark_metrics
42
+
43
+ fig = go.Figure()
44
+
45
+ # ── Strategy lines ────────────────────────────────────────────────────────
46
+ for name, res in results.items():
47
+ if res is None:
48
+ continue
49
+ colour = APPROACH_COLOURS.get(name, "#aaaaaa")
50
+ width = 3 if name == winner_name else 1.5
51
+ dash = "solid" if name == winner_name else "dot"
52
+
53
+ n = min(len(res["cum_returns"]), len(plot_dates))
54
+
55
+ fig.add_trace(go.Scatter(
56
+ x=plot_dates[:n],
57
+ y=res["cum_returns"][:n],
58
+ mode="lines",
59
+ name=f"{name} {'★' if name == winner_name else ''}",
60
+ line=dict(color=colour, width=width, dash=dash),
61
+ fill="tozeroy" if name == winner_name else None,
62
+ fillcolor=f"rgba({_hex_to_rgb(colour)},0.07)" if name == winner_name else None,
63
+ ))
64
+
65
+ # ── Benchmark: SPY ────────────────────────────────────────────────────────
66
+ if "SPY_Ret" in df.columns:
67
+ spy_rets = df["SPY_Ret"].iloc[test_slice].values
68
+ n = min(len(spy_rets), len(plot_dates))
69
+ spy_m = compute_benchmark_metrics(spy_rets[:n], tbill_rate)
70
+ fig.add_trace(go.Scatter(
71
+ x=plot_dates[:n],
72
+ y=spy_m["cum_returns"],
73
+ mode="lines",
74
+ name="SPY (Equity BM)",
75
+ line=dict(color=BENCHMARK_COLOURS["SPY"], width=1.5, dash="dot"),
76
+ ))
77
+
78
+ # ── Benchmark: AGG ────────────────────────────────────────────────────────
79
+ if "AGG_Ret" in df.columns:
80
+ agg_rets = df["AGG_Ret"].iloc[test_slice].values
81
+ n = min(len(agg_rets), len(plot_dates))
82
+ agg_m = compute_benchmark_metrics(agg_rets[:n], tbill_rate)
83
+ fig.add_trace(go.Scatter(
84
+ x=plot_dates[:n],
85
+ y=agg_m["cum_returns"],
86
+ mode="lines",
87
+ name="AGG (Bond BM)",
88
+ line=dict(color=BENCHMARK_COLOURS["AGG"], width=1.5, dash="dot"),
89
+ ))
90
+
91
+ fig.update_layout(
92
+ template="plotly_dark",
93
+ height=460,
94
+ hovermode="x unified",
95
+ showlegend=True,
96
+ legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01, font=dict(size=11)),
97
+ xaxis_title="Date",
98
+ yaxis_title="Cumulative Return (×)",
99
+ margin=dict(l=50, r=30, t=20, b=50),
100
+ )
101
+ return fig
102
+
103
+
104
+ def comparison_bar_chart(results: dict, winner_name: str) -> go.Figure:
105
+ """
106
+ Horizontal bar chart comparing annualised returns across all three approaches.
107
+ """
108
+ names = []
109
+ returns = []
110
+ colours = []
111
+
112
+ for name, res in results.items():
113
+ if res is None:
114
+ continue
115
+ names.append(name)
116
+ returns.append(res["ann_return"] * 100)
117
+ colours.append(APPROACH_COLOURS.get(name, "#aaaaaa"))
118
+
119
+ fig = go.Figure(go.Bar(
120
+ x=returns,
121
+ y=names,
122
+ orientation="h",
123
+ marker_color=colours,
124
+ text=[f"{r:.1f}%" for r in returns],
125
+ textposition="auto",
126
+ ))
127
+
128
+ fig.update_layout(
129
+ template="plotly_dark",
130
+ height=200,
131
+ xaxis_title="Annualised Return (%)",
132
+ margin=dict(l=100, r=30, t=10, b=40),
133
+ showlegend=False,
134
+ )
135
+ return fig
136
+
137
+
138
+ # ── Helper ────────────────────────────────────────────────────────────────────
139
+
140
+ def _hex_to_rgb(hex_color: str) -> str:
141
+ """Convert #rrggbb to 'r,g,b' string for rgba()."""
142
+ h = hex_color.lstrip("#")
143
+ r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
144
+ return f"{r},{g},{b}"
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/calendar.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ utils/calendar.py
3
+ NYSE calendar utilities:
4
+ - Next trading day for signal display
5
+ - Market open check
6
+ - EST time helper
7
+ """
8
+
9
+ from datetime import datetime, timedelta
10
+ import pytz
11
+
12
+ try:
13
+ import pandas_market_calendars as mcal
14
+ NYSE_CAL_AVAILABLE = True
15
+ except ImportError:
16
+ NYSE_CAL_AVAILABLE = False
17
+
18
+
19
+ def get_est_time() -> datetime:
20
+ """Return current datetime in US/Eastern timezone."""
21
+ return datetime.now(pytz.timezone("US/Eastern"))
22
+
23
+
24
+ def is_market_open_today() -> bool:
25
+ """Return True if today is a NYSE trading day."""
26
+ today = get_est_time().date()
27
+ if NYSE_CAL_AVAILABLE:
28
+ try:
29
+ nyse = mcal.get_calendar("NYSE")
30
+ schedule = nyse.schedule(start_date=today, end_date=today)
31
+ return len(schedule) > 0
32
+ except Exception:
33
+ pass
34
+ return today.weekday() < 5
35
+
36
+
37
+ def get_next_signal_date() -> datetime.date:
38
+ """
39
+ Determine the date for which the model's signal applies.
40
+
41
+ Rules:
42
+ - If today is a NYSE trading day AND it is before 09:30 EST
43
+ → signal applies to TODAY (market hasn't opened yet)
44
+ - Otherwise
45
+ → signal applies to the NEXT NYSE trading day
46
+ """
47
+ now_est = get_est_time()
48
+ today = now_est.date()
49
+
50
+ market_not_open_yet = (
51
+ now_est.hour < 9 or
52
+ (now_est.hour == 9 and now_est.minute < 30)
53
+ )
54
+
55
+ if NYSE_CAL_AVAILABLE:
56
+ try:
57
+ nyse = mcal.get_calendar("NYSE")
58
+ schedule = nyse.schedule(
59
+ start_date=today,
60
+ end_date=today + timedelta(days=10),
61
+ )
62
+ if len(schedule) == 0:
63
+ return today # fallback
64
+
65
+ first_day = schedule.index[0].date()
66
+
67
+ # Today is a trading day and market hasn't opened → today
68
+ if first_day == today and market_not_open_yet:
69
+ return today
70
+
71
+ # Otherwise find first trading day strictly after today
72
+ for ts in schedule.index:
73
+ d = ts.date()
74
+ if d > today:
75
+ return d
76
+
77
+ return schedule.index[-1].date()
78
+ except Exception:
79
+ pass
80
+
81
+ # Fallback: simple weekend skip
82
+ candidate = today if market_not_open_yet else today + timedelta(days=1)
83
+ while candidate.weekday() >= 5:
84
+ candidate += timedelta(days=1)
85
+ return candidate
86
+
87
+
88
+ def is_sync_window() -> bool:
89
+ """True if current EST time is in the 07:00-08:00 or 19:00-20:00 window."""
90
+ now = get_est_time()
91
+ return (7 <= now.hour < 8) or (19 <= now.hour < 20)
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # models package
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py CHANGED
@@ -1 +1 @@
1
-
 
1
+ # strategy package
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/__init__.py CHANGED
@@ -1 +1 @@
1
-
 
1
+ # strategy package
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py CHANGED
@@ -1 +1 @@
1
- # strategy package
 
1
+ # signals package
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/__init__.py CHANGED
@@ -1 +1 @@
1
-
 
1
+ # ui package
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/__init__.py CHANGED
@@ -1 +1 @@
1
-
 
1
+ # utils package
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py CHANGED
@@ -1 +1,217 @@
 
 
 
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ models/approach2_regime.py
3
+ Approach 2: Regime-Conditioned CNN-LSTM
4
 
5
+ Pipeline:
6
+ Raw macro signals
7
+ -> CNN Tower (64 filters, k=3) -> feature vector
8
+ -> Regime Classifier (HMM on VIX + HY spread + T10Y2Y) -> one-hot [4]
9
+ -> Concatenate CNN features + regime embedding
10
+ -> LSTM (128 units)
11
+ -> Dense 64 -> Softmax (n_etfs + 1 CASH)
12
+
13
+ NOTE: tensorflow and hmmlearn are imported lazily inside functions
14
+ to prevent module-level import failures from making this module
15
+ appear broken to Python's import system.
16
+ """
17
+
18
+ import numpy as np
19
+
20
+ N_REGIMES = 4
21
+ REGIME_HINTS = ["VIX", "HY", "Spread", "T10Y2Y", "T10Y3M", "Credit"]
22
+
23
+
24
+ # ---------------------------------------------------------------------------
25
+ # Regime detection helpers
26
+ # ---------------------------------------------------------------------------
27
+
28
+ def _get_regime_cols(feature_names: list) -> list:
29
+ return [
30
+ f for f in feature_names
31
+ if any(hint.lower() in f.lower() for hint in REGIME_HINTS)
32
+ ]
33
+
34
+
35
+ def fit_regime_model(X_flat: np.ndarray, feature_names: list,
36
+ n_regimes: int = N_REGIMES):
37
+ """
38
+ Fit a Gaussian HMM on regime-relevant macro features.
39
+ Returns (hmm_model, regime_cols_idx).
40
+ hmm_model is None if hmmlearn is unavailable or fitting fails.
41
+ """
42
+ regime_col_names = _get_regime_cols(feature_names)
43
+ if not regime_col_names:
44
+ regime_col_names = feature_names[:min(3, len(feature_names))]
45
+
46
+ regime_cols_idx = [
47
+ feature_names.index(c) for c in regime_col_names
48
+ if c in feature_names
49
+ ]
50
+ X_regime = X_flat[:, regime_cols_idx]
51
+
52
+ try:
53
+ from hmmlearn.hmm import GaussianHMM
54
+ hmm = GaussianHMM(
55
+ n_components=n_regimes,
56
+ covariance_type="diag",
57
+ n_iter=100,
58
+ random_state=42,
59
+ )
60
+ hmm.fit(X_regime)
61
+ return hmm, regime_cols_idx
62
+ except Exception as e:
63
+ print(f"[Approach 2] HMM fitting failed: {e}. Using fallback.")
64
+ return None, regime_cols_idx
65
+
66
+
67
+ def predict_regimes(hmm_model, X_flat: np.ndarray,
68
+ regime_cols_idx: list,
69
+ n_regimes: int = N_REGIMES) -> np.ndarray:
70
+ """Predict integer regime label for each day."""
71
+ X_regime = X_flat[:, regime_cols_idx]
72
+
73
+ if hmm_model is not None:
74
+ try:
75
+ return hmm_model.predict(X_regime)
76
+ except Exception:
77
+ pass
78
+
79
+ # Fallback: quantile binning on first regime feature
80
+ feat = X_regime[:, 0]
81
+ quantiles = np.percentile(feat, np.linspace(0, 100, n_regimes + 1))
82
+ return np.digitize(feat, quantiles[1:-1]).astype(int)
83
+
84
+
85
+ def regimes_to_onehot(regimes: np.ndarray,
86
+ n_regimes: int = N_REGIMES) -> np.ndarray:
87
+ one_hot = np.zeros((len(regimes), n_regimes), dtype=np.float32)
88
+ for i, r in enumerate(regimes):
89
+ one_hot[i, min(int(r), n_regimes - 1)] = 1.0
90
+ return one_hot
91
+
92
+
93
+ def build_regime_sequences(X_seq: np.ndarray,
94
+ regimes_flat: np.ndarray,
95
+ lookback: int) -> np.ndarray:
96
+ n_samples = X_seq.shape[0]
97
+ aligned = regimes_flat[lookback: lookback + n_samples]
98
+ return regimes_to_onehot(aligned)
99
+
100
+
101
+ # ---------------------------------------------------------------------------
102
+ # Model builder
103
+ # ---------------------------------------------------------------------------
104
+
105
+ def build_regime_cnn_lstm(seq_input_shape: tuple,
106
+ n_classes: int,
107
+ n_regimes: int = N_REGIMES,
108
+ dropout: float = 0.3,
109
+ lstm_units: int = 128):
110
+ """Build and compile the regime-conditioned CNN-LSTM model."""
111
+ from tensorflow import keras
112
+ from models.base import classification_head
113
+
114
+ seq_input = keras.Input(shape=seq_input_shape, name="seq_input")
115
+ x = keras.layers.Conv1D(64, kernel_size=3, padding="causal",
116
+ activation="relu")(seq_input)
117
+ x = keras.layers.BatchNormalization()(x)
118
+ x = keras.layers.MaxPooling1D(pool_size=2)(x)
119
+ x = keras.layers.Conv1D(32, kernel_size=3, padding="causal",
120
+ activation="relu")(x)
121
+ x = keras.layers.BatchNormalization()(x)
122
+ x = keras.layers.Dropout(dropout)(x)
123
+ cnn_out = keras.layers.GlobalAveragePooling1D()(x)
124
+
125
+ regime_input = keras.Input(shape=(n_regimes,), name="regime_input")
126
+ regime_emb = keras.layers.Dense(8, activation="relu")(regime_input)
127
+
128
+ merged = keras.layers.Concatenate()([cnn_out, regime_emb])
129
+ x = keras.layers.Reshape((1, merged.shape[-1]))(merged)
130
+ x = keras.layers.LSTM(lstm_units, dropout=dropout)(x)
131
+
132
+ outputs = classification_head(x, n_classes, dropout)
133
+
134
+ model = keras.Model(
135
+ inputs=[seq_input, regime_input],
136
+ outputs=outputs,
137
+ name="Approach2_Regime_CNN_LSTM",
138
+ )
139
+ model.compile(
140
+ optimizer=keras.optimizers.Adam(learning_rate=1e-3),
141
+ loss="sparse_categorical_crossentropy",
142
+ metrics=["accuracy"],
143
+ )
144
+ return model
145
+
146
+
147
+ # ---------------------------------------------------------------------------
148
+ # Training pipeline
149
+ # ---------------------------------------------------------------------------
150
+
151
+ def train_approach2(
152
+ X_train, y_train,
153
+ X_val, y_val,
154
+ X_flat_all: np.ndarray,
155
+ feature_names: list,
156
+ lookback: int,
157
+ train_size: int,
158
+ val_size: int,
159
+ n_classes: int,
160
+ epochs: int = 100,
161
+ batch_size: int = 32,
162
+ dropout: float = 0.3,
163
+ lstm_units: int = 128,
164
+ ):
165
+ """
166
+ Fit HMM regime model then train the regime-conditioned CNN-LSTM.
167
+ Returns: model, history, hmm_model, regime_cols_idx
168
+ """
169
+ from models.base import get_callbacks
170
+
171
+ X_flat_train = X_flat_all[:train_size + lookback]
172
+ hmm_model, regime_cols_idx = fit_regime_model(X_flat_train, feature_names)
173
+
174
+ regimes_all = predict_regimes(hmm_model, X_flat_all, regime_cols_idx)
175
+
176
+ R_train = build_regime_sequences(X_train, regimes_all, lookback)
177
+ R_val = build_regime_sequences(X_val, regimes_all, lookback + train_size)
178
+
179
+ model = build_regime_cnn_lstm(
180
+ X_train.shape[1:], n_classes,
181
+ dropout=dropout, lstm_units=lstm_units,
182
+ )
183
+
184
+ history = model.fit(
185
+ [X_train, R_train], y_train,
186
+ validation_data=([X_val, R_val], y_val),
187
+ epochs=epochs,
188
+ batch_size=batch_size,
189
+ callbacks=get_callbacks(),
190
+ verbose=0,
191
+ )
192
+
193
+ return model, history, hmm_model, regime_cols_idx
194
+
195
+
196
+ # ---------------------------------------------------------------------------
197
+ # Inference
198
+ # ---------------------------------------------------------------------------
199
+
200
+ def predict_approach2(
201
+ model,
202
+ X_test: np.ndarray,
203
+ X_flat_all: np.ndarray,
204
+ regime_cols_idx: list,
205
+ hmm_model,
206
+ lookback: int,
207
+ train_size: int,
208
+ val_size: int,
209
+ ) -> tuple:
210
+ """Predict on test set with regime conditioning. Returns (preds, proba)."""
211
+ regimes_all = predict_regimes(hmm_model, X_flat_all, regime_cols_idx)
212
+ offset = lookback + train_size + val_size
213
+ R_test = build_regime_sequences(X_test, regimes_all, offset)
214
+
215
+ proba = model.predict([X_test, R_test], verbose=0)
216
+ preds = np.argmax(proba, axis=1)
217
+ return preds, proba
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py CHANGED
@@ -1,18 +1,16 @@
1
  """
2
  models/base.py
3
- Shared utilities for all three CNN-LSTM variants:
4
- - Data preparation (sequences, train/val/test split)
5
- - Common Keras layers / callbacks
6
- - Predict + evaluate helpers
7
  """
8
 
9
  import numpy as np
10
  import pandas as pd
11
  from sklearn.preprocessing import RobustScaler
 
12
  import tensorflow as tf
13
  from tensorflow import keras
14
 
15
- # ── Reproducibility ───────────────────────────────────────────────────────────
16
  SEED = 42
17
  tf.random.set_seed(SEED)
18
  np.random.seed(SEED)
@@ -23,15 +21,7 @@ np.random.seed(SEED)
23
  def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int):
24
  """
25
  Build supervised sequences for CNN-LSTM input.
26
-
27
- Args:
28
- features : 2-D array [n_days, n_features]
29
- targets : 2-D array [n_days, n_etfs] (raw returns)
30
- lookback : number of past days per sample
31
-
32
- Returns:
33
- X : [n_samples, lookback, n_features]
34
- y : [n_samples, n_etfs] (raw returns for the next day)
35
  """
36
  X, y = [], []
37
  for i in range(lookback, len(features)):
@@ -43,11 +33,9 @@ def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int):
43
  # ── Train / val / test split ──────────────────────────────────────────────────
44
 
45
  def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15):
46
- """Split sequences into train / val / test preserving temporal order."""
47
- n = len(X)
48
  t1 = int(n * train_pct)
49
  t2 = int(n * (train_pct + val_pct))
50
-
51
  return (
52
  X[:t1], y[:t1],
53
  X[t1:t2], y[t1:t2],
@@ -58,56 +46,66 @@ def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15):
58
  # ── Feature scaling ───────────────────────────────────────────────────────────
59
 
60
  def scale_features(X_train, X_val, X_test):
61
- """
62
- Fit RobustScaler on training data only, apply to val and test.
63
- Operates on the flattened feature dimension.
64
-
65
- Returns scaled arrays with same shape as inputs.
66
- """
67
- n_train, lb, n_feat = X_train.shape
68
- scaler = RobustScaler()
69
-
70
- # Fit on train
71
  scaler.fit(X_train.reshape(-1, n_feat))
72
 
73
- def _transform(X):
74
- shape = X.shape
75
- return scaler.transform(X.reshape(-1, n_feat)).reshape(shape)
76
 
77
- return _transform(X_train), _transform(X_val), _transform(X_test), scaler
78
 
79
 
80
- # ── Label builder (classification: argmax of returns) ────────────────────────
81
 
82
  def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0):
83
  """
84
- Convert raw return matrix to integer class labels.
85
-
86
- If include_cash=True, adds a CASH class (index = n_etfs) when
87
- the best ETF return is below cash_threshold.
88
-
89
- Args:
90
- y_raw : [n_samples, n_etfs]
91
- include_cash : whether to allow CASH class
92
- cash_threshold : minimum ETF return to prefer over CASH
93
-
94
- Returns:
95
- labels : [n_samples] integer class indices
96
  """
97
- best = np.argmax(y_raw, axis=1)
98
  if include_cash:
99
- best_return = y_raw[np.arange(len(y_raw)), best]
100
- cash_idx = y_raw.shape[1]
101
- labels = np.where(best_return < cash_threshold, cash_idx, best)
102
  else:
103
  labels = best
104
  return labels.astype(np.int32)
105
 
106
 
107
- # ── Common Keras callbacks ────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
108
 
109
- def get_callbacks(patience_es=15, patience_lr=8, min_lr=1e-6):
110
- """Standard early stopping + reduce-LR callbacks shared by all models."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  return [
112
  keras.callbacks.EarlyStopping(
113
  monitor="val_loss",
@@ -125,75 +123,51 @@ def get_callbacks(patience_es=15, patience_lr=8, min_lr=1e-6):
125
  ]
126
 
127
 
128
- # ── Common output head ────────────────────────────────────────────────────────
129
 
130
  def classification_head(x, n_classes: int, dropout: float = 0.3):
131
- """
132
- Shared dense output head for all three CNN-LSTM variants.
133
-
134
- Args:
135
- x : input tensor
136
- n_classes : number of ETF classes (+ 1 for CASH if applicable)
137
- dropout : dropout rate
138
-
139
- Returns:
140
- output tensor with softmax activation
141
- """
142
  x = keras.layers.Dense(64, activation="relu")(x)
 
143
  x = keras.layers.Dropout(dropout)(x)
 
 
144
  x = keras.layers.Dense(n_classes, activation="softmax")(x)
145
  return x
146
 
147
 
148
- # ── Prediction helper ─────────────────────────────────────────────────────────
149
 
150
- def predict_classes(model, X_test: np.ndarray) -> np.ndarray:
151
- """Return integer class predictions from a Keras model."""
152
  proba = model.predict(X_test, verbose=0)
153
  return np.argmax(proba, axis=1), proba
154
 
155
 
156
- # ── Metrics helper ────────────────────────────────────────────────────────────
157
 
158
  def evaluate_returns(
159
- preds: np.ndarray,
160
- proba: np.ndarray,
161
- y_raw_test: np.ndarray,
162
- target_etfs: list,
163
- tbill_rate: float,
164
- fee_bps: int,
165
- include_cash: bool = True,
166
  ):
167
- """
168
- Given integer class predictions and raw return matrix,
169
- compute strategy returns and summary metrics.
170
-
171
- Returns:
172
- strat_rets : np.ndarray of daily net returns
173
- ann_return : annualised return (float)
174
- cum_returns : cumulative return series
175
- last_proba : probability vector for the last prediction
176
- next_etf : name of ETF predicted for next session
177
- """
178
  n_etfs = len(target_etfs)
179
- strat_rets = []
 
180
 
181
  for i, cls in enumerate(preds):
182
  if include_cash and cls == n_etfs:
183
- # CASH: earn daily T-bill rate
184
- daily_tbill = tbill_rate / 252
185
- net = daily_tbill - (fee_bps / 10000)
186
  else:
187
- ret = y_raw_test[i][cls]
188
- net = ret - (fee_bps / 10000)
189
  strat_rets.append(net)
190
 
191
  strat_rets = np.array(strat_rets)
192
  cum_returns = np.cumprod(1 + strat_rets)
193
- ann_return = (cum_returns[-1] ** (252 / len(strat_rets))) - 1
194
 
195
  last_proba = proba[-1]
196
  next_cls = int(np.argmax(last_proba))
197
- next_etf = "CASH" if (include_cash and next_cls == n_etfs) else target_etfs[next_cls].replace("_Ret", "")
 
 
 
198
 
199
  return strat_rets, ann_return, cum_returns, last_proba, next_etf
 
1
  """
2
  models/base.py
3
+ Shared utilities for all three CNN-LSTM variants.
4
+ Key fix: class_weight support to prevent majority-class collapse.
 
 
5
  """
6
 
7
  import numpy as np
8
  import pandas as pd
9
  from sklearn.preprocessing import RobustScaler
10
+ from sklearn.utils.class_weight import compute_class_weight
11
  import tensorflow as tf
12
  from tensorflow import keras
13
 
 
14
  SEED = 42
15
  tf.random.set_seed(SEED)
16
  np.random.seed(SEED)
 
21
  def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int):
22
  """
23
  Build supervised sequences for CNN-LSTM input.
24
+ X[i] = features[i : i+lookback] → predicts y[i+lookback]
 
 
 
 
 
 
 
 
25
  """
26
  X, y = [], []
27
  for i in range(lookback, len(features)):
 
33
  # ── Train / val / test split ──────────────────────────────────────────────────
34
 
35
  def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15):
36
+ n = len(X)
 
37
  t1 = int(n * train_pct)
38
  t2 = int(n * (train_pct + val_pct))
 
39
  return (
40
  X[:t1], y[:t1],
41
  X[t1:t2], y[t1:t2],
 
46
  # ── Feature scaling ───────────────────────────────────────────────────────────
47
 
48
  def scale_features(X_train, X_val, X_test):
49
+ n_feat = X_train.shape[2]
50
+ scaler = RobustScaler()
 
 
 
 
 
 
 
 
51
  scaler.fit(X_train.reshape(-1, n_feat))
52
 
53
+ def _t(X):
54
+ s = X.shape
55
+ return scaler.transform(X.reshape(-1, n_feat)).reshape(s)
56
 
57
+ return _t(X_train), _t(X_val), _t(X_test), scaler
58
 
59
 
60
+ # ── Label builder ─────────────────────────────────────────────────────────────
61
 
62
  def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0):
63
  """
64
+ Assign label = argmax(returns).
65
+ If include_cash and best return < cash_threshold → label = n_etfs (CASH).
 
 
 
 
 
 
 
 
 
 
66
  """
67
+ best = np.argmax(y_raw, axis=1)
68
  if include_cash:
69
+ best_ret = y_raw[np.arange(len(y_raw)), best]
70
+ cash_idx = y_raw.shape[1]
71
+ labels = np.where(best_ret < cash_threshold, cash_idx, best)
72
  else:
73
  labels = best
74
  return labels.astype(np.int32)
75
 
76
 
77
+ # ── Class weights ─────────────────────────────────────────────────────────────
78
+
79
+ def compute_class_weights(y_labels: np.ndarray, n_classes: int) -> dict:
80
+ """
81
+ Compute balanced class weights to counteract majority-class collapse.
82
+ Returns dict {class_index: weight} for use in model.fit().
83
+ """
84
+ classes = np.arange(n_classes)
85
+ present = np.unique(y_labels)
86
 
87
+ try:
88
+ weights = compute_class_weight(
89
+ class_weight="balanced",
90
+ classes=present,
91
+ y=y_labels,
92
+ )
93
+ weight_dict = {int(c): float(w) for c, w in zip(present, weights)}
94
+ except Exception:
95
+ weight_dict = {}
96
+
97
+ # Fill any missing classes with weight 1.0
98
+ for c in classes:
99
+ if c not in weight_dict:
100
+ weight_dict[c] = 1.0
101
+
102
+ return weight_dict
103
+
104
+
105
+ # ── Callbacks ─────────────────────────────────────────────────────────────────
106
+
107
+ def get_callbacks(patience_es=20, patience_lr=10, min_lr=1e-6):
108
+ """Longer patience to allow models time to learn past majority class."""
109
  return [
110
  keras.callbacks.EarlyStopping(
111
  monitor="val_loss",
 
123
  ]
124
 
125
 
126
+ # ── Output head ───────────────────────────────────────────────────────────────
127
 
128
  def classification_head(x, n_classes: int, dropout: float = 0.3):
 
 
 
 
 
 
 
 
 
 
 
129
  x = keras.layers.Dense(64, activation="relu")(x)
130
+ x = keras.layers.BatchNormalization()(x)
131
  x = keras.layers.Dropout(dropout)(x)
132
+ x = keras.layers.Dense(32, activation="relu")(x)
133
+ x = keras.layers.Dropout(dropout / 2)(x)
134
  x = keras.layers.Dense(n_classes, activation="softmax")(x)
135
  return x
136
 
137
 
138
+ # ── Prediction ────────────────────────────────────────────────────────────────
139
 
140
+ def predict_classes(model, X_test: np.ndarray) -> tuple:
 
141
  proba = model.predict(X_test, verbose=0)
142
  return np.argmax(proba, axis=1), proba
143
 
144
 
145
+ # ── Metrics ───────────────────────────────────────────────────────────────────
146
 
147
  def evaluate_returns(
148
+ preds, proba, y_raw_test, target_etfs, tbill_rate, fee_bps, include_cash=True,
 
 
 
 
 
 
149
  ):
 
 
 
 
 
 
 
 
 
 
 
150
  n_etfs = len(target_etfs)
151
+ daily_tbill = tbill_rate / 252
152
+ strat_rets = []
153
 
154
  for i, cls in enumerate(preds):
155
  if include_cash and cls == n_etfs:
156
+ net = daily_tbill - fee_bps / 10000
 
 
157
  else:
158
+ cls = min(int(cls), n_etfs - 1)
159
+ net = float(y_raw_test[i][cls]) - fee_bps / 10000
160
  strat_rets.append(net)
161
 
162
  strat_rets = np.array(strat_rets)
163
  cum_returns = np.cumprod(1 + strat_rets)
164
+ ann_return = cum_returns[-1] ** (252 / len(strat_rets)) - 1
165
 
166
  last_proba = proba[-1]
167
  next_cls = int(np.argmax(last_proba))
168
+ next_etf = (
169
+ "CASH" if (include_cash and next_cls == n_etfs)
170
+ else target_etfs[min(next_cls, n_etfs - 1)].replace("_Ret", "")
171
+ )
172
 
173
  return strat_rets, ann_return, cum_returns, last_proba, next_etf
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py CHANGED
@@ -1,167 +1,86 @@
1
  """
2
  models/approach1_wavelet.py
3
  Approach 1: Wavelet Decomposition CNN-LSTM
4
-
5
- Pipeline:
6
- Raw macro signals
7
- → DWT (db4, level=3) per signal → multi-band channel stack
8
- → 1D CNN (64 filters, k=3) → MaxPool → (32 filters, k=3)
9
- → LSTM (128 units)
10
- → Dense 64 → Softmax (n_etfs + 1 CASH)
11
  """
12
 
13
  import numpy as np
14
  import pywt
15
- import tensorflow as tf
16
- from tensorflow import keras
17
- from models.base import classification_head, get_callbacks
18
-
19
- WAVELET = "db4"
20
- LEVEL = 3
21
 
 
 
22
 
23
- # ── Wavelet feature engineering ───────────────────────────────────────────────
24
 
25
  def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> np.ndarray:
26
- """
27
- Decompose a 1-D signal into DWT subbands and return them stacked.
28
-
29
- For a signal of length T:
30
- coeffs = [cA_n, cD_n, cD_{n-1}, ..., cD_1]
31
- We interpolate each subband back to length T so we can stack them.
32
-
33
- Returns: array of shape [T, level+1]
34
- """
35
  T = len(signal)
36
  coeffs = pywt.wavedec(signal, wavelet, level=level)
37
  bands = []
38
  for c in coeffs:
39
- # Interpolate back to original length
40
- band = np.interp(
41
- np.linspace(0, len(c) - 1, T),
42
- np.arange(len(c)),
43
- c,
44
- )
45
  bands.append(band)
46
- return np.stack(bands, axis=-1) # [T, level+1]
47
-
48
 
49
- def apply_wavelet_transform(X: np.ndarray, wavelet: str = WAVELET, level: int = LEVEL) -> np.ndarray:
50
- """
51
- Apply DWT to every feature channel across all samples.
52
 
53
- Args:
54
- X : [n_samples, lookback, n_features]
55
-
56
- Returns:
57
- X_wt : [n_samples, lookback, n_features * (level+1)]
58
- """
59
  n_samples, lookback, n_features = X.shape
60
- n_bands = level + 1
61
- X_wt = np.zeros((n_samples, lookback, n_features * n_bands), dtype=np.float32)
62
-
63
  for s in range(n_samples):
64
  for f in range(n_features):
65
- decomposed = _wavelet_decompose_signal(X[s, :, f], wavelet, level) # [T, n_bands]
66
  start = f * n_bands
67
  X_wt[s, :, start: start + n_bands] = decomposed
68
-
69
  return X_wt
70
 
71
 
72
- # ── Model builder ─────────────────────────────────────────────────────────────
73
-
74
- def build_wavelet_cnn_lstm(
75
- input_shape: tuple,
76
- n_classes: int,
77
- dropout: float = 0.3,
78
- lstm_units: int = 128,
79
- ) -> keras.Model:
80
- """
81
- Build Wavelet CNN-LSTM model.
82
 
83
- Args:
84
- input_shape : (lookback, n_features * n_bands) — post-DWT shape
85
- n_classes : number of output classes (ETFs + CASH)
86
- dropout : dropout rate
87
- lstm_units : LSTM hidden size
88
-
89
- Returns:
90
- Compiled Keras model
91
- """
92
- inputs = keras.Input(shape=input_shape, name="wavelet_input")
93
-
94
- # CNN block 1
95
- x = keras.layers.Conv1D(64, kernel_size=3, padding="causal", activation="relu")(inputs)
96
  x = keras.layers.BatchNormalization()(x)
97
- x = keras.layers.MaxPooling1D(pool_size=2)(x)
98
-
99
- # CNN block 2
100
- x = keras.layers.Conv1D(32, kernel_size=3, padding="causal", activation="relu")(x)
101
  x = keras.layers.BatchNormalization()(x)
102
  x = keras.layers.Dropout(dropout)(x)
103
-
104
- # LSTM
105
  x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(x)
106
-
107
- # Output head
108
  outputs = classification_head(x, n_classes, dropout)
109
 
110
- model = keras.Model(inputs, outputs, name="Approach1_Wavelet_CNN_LSTM")
111
  model.compile(
112
- optimizer=keras.optimizers.Adam(learning_rate=1e-3),
113
  loss="sparse_categorical_crossentropy",
114
  metrics=["accuracy"],
115
  )
116
  return model
117
 
118
 
119
- # ── Full train pipeline ───────────────────────────────────────────────────────
120
-
121
  def train_approach1(
122
- X_train, y_train,
123
- X_val, y_val,
124
- n_classes: int,
125
- epochs: int = 100,
126
- batch_size: int = 32,
127
- dropout: float = 0.3,
128
- lstm_units: int = 128,
129
  ):
130
- """
131
- Apply wavelet transform then train the CNN-LSTM.
132
-
133
- Args:
134
- X_train/val : [n, lookback, n_features] (scaled, pre-wavelet)
135
- y_train/val : [n] integer class labels
136
- n_classes : total output classes
137
-
138
- Returns:
139
- model : trained Keras model
140
- history : training history
141
- wt_shape : post-DWT input shape (for inference)
142
- """
143
- # Apply DWT
144
- X_train_wt = apply_wavelet_transform(X_train)
145
- X_val_wt = apply_wavelet_transform(X_val)
146
-
147
- input_shape = X_train_wt.shape[1:] # (lookback, n_features * n_bands)
148
  model = build_wavelet_cnn_lstm(input_shape, n_classes, dropout, lstm_units)
 
149
 
150
  history = model.fit(
151
  X_train_wt, y_train,
152
  validation_data=(X_val_wt, y_val),
153
  epochs=epochs,
154
  batch_size=batch_size,
 
155
  callbacks=get_callbacks(),
156
  verbose=0,
157
  )
158
-
159
  return model, history, input_shape
160
 
161
 
162
  def predict_approach1(model, X_test: np.ndarray) -> tuple:
163
- """Apply DWT to test set then predict. Returns (class_preds, proba)."""
164
  X_test_wt = apply_wavelet_transform(X_test)
165
  proba = model.predict(X_test_wt, verbose=0)
166
- preds = np.argmax(proba, axis=1)
167
- return preds, proba
 
1
  """
2
  models/approach1_wavelet.py
3
  Approach 1: Wavelet Decomposition CNN-LSTM
4
+ With class weights to prevent majority-class collapse.
 
 
 
 
 
 
5
  """
6
 
7
  import numpy as np
8
  import pywt
 
 
 
 
 
 
9
 
10
+ WAVELET = "db4"
11
+ LEVEL = 3
12
 
 
13
 
14
  def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> np.ndarray:
 
 
 
 
 
 
 
 
 
15
  T = len(signal)
16
  coeffs = pywt.wavedec(signal, wavelet, level=level)
17
  bands = []
18
  for c in coeffs:
19
+ band = np.interp(np.linspace(0, len(c)-1, T), np.arange(len(c)), c)
 
 
 
 
 
20
  bands.append(band)
21
+ return np.stack(bands, axis=-1)
 
22
 
 
 
 
23
 
24
+ def apply_wavelet_transform(X: np.ndarray, wavelet=WAVELET, level=LEVEL) -> np.ndarray:
 
 
 
 
 
25
  n_samples, lookback, n_features = X.shape
26
+ n_bands = level + 1
27
+ X_wt = np.zeros((n_samples, lookback, n_features * n_bands), dtype=np.float32)
 
28
  for s in range(n_samples):
29
  for f in range(n_features):
30
+ decomposed = _wavelet_decompose_signal(X[s, :, f], wavelet, level)
31
  start = f * n_bands
32
  X_wt[s, :, start: start + n_bands] = decomposed
 
33
  return X_wt
34
 
35
 
36
+ def build_wavelet_cnn_lstm(input_shape, n_classes, dropout=0.3, lstm_units=128):
37
+ from tensorflow import keras
38
+ from models.base import classification_head
 
 
 
 
 
 
 
39
 
40
+ inputs = keras.Input(shape=input_shape)
41
+ x = keras.layers.Conv1D(64, 3, padding="causal", activation="relu")(inputs)
 
 
 
 
 
 
 
 
 
 
 
42
  x = keras.layers.BatchNormalization()(x)
43
+ x = keras.layers.MaxPooling1D(2)(x)
44
+ x = keras.layers.Conv1D(32, 3, padding="causal", activation="relu")(x)
 
 
45
  x = keras.layers.BatchNormalization()(x)
46
  x = keras.layers.Dropout(dropout)(x)
 
 
47
  x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(x)
 
 
48
  outputs = classification_head(x, n_classes, dropout)
49
 
50
+ model = keras.Model(inputs, outputs, name="Approach1_Wavelet")
51
  model.compile(
52
+ optimizer=keras.optimizers.Adam(1e-3),
53
  loss="sparse_categorical_crossentropy",
54
  metrics=["accuracy"],
55
  )
56
  return model
57
 
58
 
 
 
59
  def train_approach1(
60
+ X_train, y_train, X_val, y_val,
61
+ n_classes, epochs=100, batch_size=32, dropout=0.3, lstm_units=128,
 
 
 
 
 
62
  ):
63
+ from models.base import get_callbacks, compute_class_weights
64
+
65
+ X_train_wt = apply_wavelet_transform(X_train)
66
+ X_val_wt = apply_wavelet_transform(X_val)
67
+ input_shape = X_train_wt.shape[1:]
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  model = build_wavelet_cnn_lstm(input_shape, n_classes, dropout, lstm_units)
69
+ cw = compute_class_weights(y_train, n_classes)
70
 
71
  history = model.fit(
72
  X_train_wt, y_train,
73
  validation_data=(X_val_wt, y_val),
74
  epochs=epochs,
75
  batch_size=batch_size,
76
+ class_weight=cw,
77
  callbacks=get_callbacks(),
78
  verbose=0,
79
  )
 
80
  return model, history, input_shape
81
 
82
 
83
  def predict_approach1(model, X_test: np.ndarray) -> tuple:
 
84
  X_test_wt = apply_wavelet_transform(X_test)
85
  proba = model.predict(X_test_wt, verbose=0)
86
+ return np.argmax(proba, axis=1), proba
 
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py CHANGED
@@ -166,7 +166,7 @@ def train_approach2(
166
  Fit HMM regime model then train the regime-conditioned CNN-LSTM.
167
  Returns: model, history, hmm_model, regime_cols_idx
168
  """
169
- from models.base import get_callbacks
170
 
171
  X_flat_train = X_flat_all[:train_size + lookback]
172
  hmm_model, regime_cols_idx = fit_regime_model(X_flat_train, feature_names)
@@ -181,11 +181,14 @@ def train_approach2(
181
  dropout=dropout, lstm_units=lstm_units,
182
  )
183
 
 
 
184
  history = model.fit(
185
  [X_train, R_train], y_train,
186
  validation_data=([X_val, R_val], y_val),
187
  epochs=epochs,
188
  batch_size=batch_size,
 
189
  callbacks=get_callbacks(),
190
  verbose=0,
191
  )
 
166
  Fit HMM regime model then train the regime-conditioned CNN-LSTM.
167
  Returns: model, history, hmm_model, regime_cols_idx
168
  """
169
+ from models.base import get_callbacks, compute_class_weights
170
 
171
  X_flat_train = X_flat_all[:train_size + lookback]
172
  hmm_model, regime_cols_idx = fit_regime_model(X_flat_train, feature_names)
 
181
  dropout=dropout, lstm_units=lstm_units,
182
  )
183
 
184
+ cw = compute_class_weights(y_train, n_classes)
185
+
186
  history = model.fit(
187
  [X_train, R_train], y_train,
188
  validation_data=([X_val, R_val], y_val),
189
  epochs=epochs,
190
  batch_size=batch_size,
191
+ class_weight=cw,
192
  callbacks=get_callbacks(),
193
  verbose=0,
194
  )
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py CHANGED
@@ -1,150 +1,80 @@
1
  """
2
  models/approach3_multiscale.py
3
  Approach 3: Multi-Scale Parallel CNN-LSTM
4
-
5
- Pipeline:
6
- Raw macro signals
7
- → 3 parallel CNN towers: kernel 3 (short), 7 (medium), 21 (long)
8
- → Concatenate [96 features]
9
- → LSTM (128 units)
10
- → Dense 64 → Softmax (n_etfs + 1 CASH)
11
  """
12
 
13
  import numpy as np
14
- import tensorflow as tf
15
- from tensorflow import keras
16
- from models.base import classification_head, get_callbacks
17
-
18
- # Kernel sizes represent: momentum (3d), weekly cycle (7d), monthly trend (21d)
19
- KERNEL_SIZES = [3, 7, 21]
20
- FILTERS_EACH = 32 # 32 × 3 towers = 96 concatenated features
21
 
 
 
22
 
23
- # ── Model builder ─────────────────────────────────────────────────────────────
24
 
25
  def build_multiscale_cnn_lstm(
26
- input_shape: tuple,
27
- n_classes: int,
28
- kernel_sizes: list = None,
29
- filters: int = FILTERS_EACH,
30
- dropout: float = 0.3,
31
- lstm_units: int = 128,
32
- ) -> keras.Model:
33
- """
34
- Multi-scale parallel CNN-LSTM.
35
-
36
- Three CNN towers with different kernel sizes run in parallel on the
37
- same input, capturing momentum, weekly cycle, and monthly trend
38
- simultaneously. Their outputs are concatenated before the LSTM.
39
-
40
- Args:
41
- input_shape : (lookback, n_features)
42
- n_classes : number of output classes (ETFs + CASH)
43
- kernel_sizes : list of kernel sizes for each tower
44
- filters : number of Conv1D filters per tower
45
- dropout : dropout rate
46
- lstm_units : LSTM hidden size
47
-
48
- Returns:
49
- Compiled Keras model
50
- """
51
  if kernel_sizes is None:
52
  kernel_sizes = KERNEL_SIZES
53
 
54
- inputs = keras.Input(shape=input_shape, name="multiscale_input")
 
55
 
56
- towers = []
57
  for k in kernel_sizes:
58
- # Each tower: Conv BN → Conv → BN → GlobalAvgPool
59
- t = keras.layers.Conv1D(
60
- filters, kernel_size=k, padding="causal", activation="relu",
61
- name=f"conv1_k{k}"
62
- )(inputs)
63
  t = keras.layers.BatchNormalization(name=f"bn1_k{k}")(t)
64
- t = keras.layers.Conv1D(
65
- filters, kernel_size=k, padding="causal", activation="relu",
66
- name=f"conv2_k{k}"
67
- )(t)
68
  t = keras.layers.BatchNormalization(name=f"bn2_k{k}")(t)
69
  t = keras.layers.Dropout(dropout, name=f"drop_k{k}")(t)
70
  towers.append(t)
71
 
72
- # Concatenate along the feature dimension keeps temporal axis intact for LSTM
73
- if len(towers) > 1:
74
- merged = keras.layers.Concatenate(axis=-1, name="tower_concat")(towers)
75
- else:
76
- merged = towers[0]
77
-
78
- # LSTM integrates multi-scale temporal features
79
- x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1, name="lstm")(merged)
80
-
81
- # Output head
82
  outputs = classification_head(x, n_classes, dropout)
83
 
84
- model = keras.Model(inputs, outputs, name="Approach3_MultiScale_CNN_LSTM")
85
  model.compile(
86
- optimizer=keras.optimizers.Adam(learning_rate=1e-3),
87
  loss="sparse_categorical_crossentropy",
88
  metrics=["accuracy"],
89
  )
90
  return model
91
 
92
 
93
- # ── Full train pipeline ───────────────────────────────────────────────────────
94
-
95
  def train_approach3(
96
- X_train, y_train,
97
- X_val, y_val,
98
- n_classes: int,
99
- epochs: int = 100,
100
- batch_size: int = 32,
101
- dropout: float = 0.3,
102
- lstm_units: int = 128,
103
- kernel_sizes: list = None,
104
  ):
105
- """
106
- Build and train the multi-scale CNN-LSTM.
107
-
108
- Args:
109
- X_train/val : [n, lookback, n_features]
110
- y_train/val : [n] integer class labels
111
- n_classes : total output classes
112
-
113
- Returns:
114
- model : trained Keras model
115
- history : training history
116
- """
117
  if kernel_sizes is None:
118
  kernel_sizes = KERNEL_SIZES
119
 
120
- # Guard: lookback must be >= largest kernel
121
- lookback = X_train.shape[1]
122
- valid_kernels = [k for k in kernel_sizes if k <= lookback]
123
- if not valid_kernels:
124
- valid_kernels = [min(3, lookback)]
125
-
126
- model = build_multiscale_cnn_lstm(
127
- input_shape=X_train.shape[1:],
128
- n_classes=n_classes,
129
- kernel_sizes=valid_kernels,
130
- dropout=dropout,
131
- lstm_units=lstm_units,
132
  )
 
133
 
134
  history = model.fit(
135
  X_train, y_train,
136
  validation_data=(X_val, y_val),
137
  epochs=epochs,
138
  batch_size=batch_size,
 
139
  callbacks=get_callbacks(),
140
  verbose=0,
141
  )
142
-
143
  return model, history
144
 
145
 
146
  def predict_approach3(model, X_test: np.ndarray) -> tuple:
147
- """Predict on test set. Returns (class_preds, proba)."""
148
  proba = model.predict(X_test, verbose=0)
149
- preds = np.argmax(proba, axis=1)
150
- return preds, proba
 
1
  """
2
  models/approach3_multiscale.py
3
  Approach 3: Multi-Scale Parallel CNN-LSTM
4
+ With class weights to prevent majority-class collapse.
 
 
 
 
 
 
5
  """
6
 
7
  import numpy as np
 
 
 
 
 
 
 
8
 
9
+ KERNEL_SIZES = [3, 7, 21]
10
+ FILTERS_EACH = 32
11
 
 
12
 
13
  def build_multiscale_cnn_lstm(
14
+ input_shape, n_classes, kernel_sizes=None,
15
+ filters=FILTERS_EACH, dropout=0.3, lstm_units=128,
16
+ ):
17
+ from tensorflow import keras
18
+ from models.base import classification_head
19
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  if kernel_sizes is None:
21
  kernel_sizes = KERNEL_SIZES
22
 
23
+ inputs = keras.Input(shape=input_shape, name="multiscale_input")
24
+ towers = []
25
 
 
26
  for k in kernel_sizes:
27
+ t = keras.layers.Conv1D(filters, k, padding="causal", activation="relu",
28
+ name=f"conv1_k{k}")(inputs)
 
 
 
29
  t = keras.layers.BatchNormalization(name=f"bn1_k{k}")(t)
30
+ t = keras.layers.Conv1D(filters, k, padding="causal", activation="relu",
31
+ name=f"conv2_k{k}")(t)
 
 
32
  t = keras.layers.BatchNormalization(name=f"bn2_k{k}")(t)
33
  t = keras.layers.Dropout(dropout, name=f"drop_k{k}")(t)
34
  towers.append(t)
35
 
36
+ merged = keras.layers.Concatenate(axis=-1)(towers) if len(towers) > 1 else towers[0]
37
+ x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(merged)
 
 
 
 
 
 
 
 
38
  outputs = classification_head(x, n_classes, dropout)
39
 
40
+ model = keras.Model(inputs, outputs, name="Approach3_MultiScale")
41
  model.compile(
42
+ optimizer=keras.optimizers.Adam(1e-3),
43
  loss="sparse_categorical_crossentropy",
44
  metrics=["accuracy"],
45
  )
46
  return model
47
 
48
 
 
 
49
  def train_approach3(
50
+ X_train, y_train, X_val, y_val,
51
+ n_classes, epochs=100, batch_size=32,
52
+ dropout=0.3, lstm_units=128, kernel_sizes=None,
 
 
 
 
 
53
  ):
54
+ from models.base import get_callbacks, compute_class_weights
55
+
 
 
 
 
 
 
 
 
 
 
56
  if kernel_sizes is None:
57
  kernel_sizes = KERNEL_SIZES
58
 
59
+ lookback = X_train.shape[1]
60
+ valid_kernels = [k for k in kernel_sizes if k <= lookback] or [min(3, lookback)]
61
+ model = build_multiscale_cnn_lstm(
62
+ X_train.shape[1:], n_classes, valid_kernels, dropout=dropout, lstm_units=lstm_units,
 
 
 
 
 
 
 
 
63
  )
64
+ cw = compute_class_weights(y_train, n_classes)
65
 
66
  history = model.fit(
67
  X_train, y_train,
68
  validation_data=(X_val, y_val),
69
  epochs=epochs,
70
  batch_size=batch_size,
71
+ class_weight=cw,
72
  callbacks=get_callbacks(),
73
  verbose=0,
74
  )
 
75
  return model, history
76
 
77
 
78
  def predict_approach3(model, X_test: np.ndarray) -> tuple:
 
79
  proba = model.predict(X_test, verbose=0)
80
+ return np.argmax(proba, axis=1), proba
 
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py CHANGED
@@ -227,3 +227,46 @@ def show_audit_trail(audit_trail: list):
227
  {"selector": "td", "props": [("padding", "10px")]},
228
  ])
229
  st.dataframe(styled, use_container_width=True, height=500)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  {"selector": "td", "props": [("padding", "10px")]},
228
  ])
229
  st.dataframe(styled, use_container_width=True, height=500)
230
+
231
+
232
+ # ── All models' next day signals panel ───────────────────────────────────────
233
+
234
+ def show_all_signals_panel(all_signals: dict, target_etfs: list, include_cash: bool, next_date):
235
+ """
236
+ Compact panel showing what each model predicts for next trading day,
237
+ with top probability displayed.
238
+ """
239
+ APPROACH_COLORS = {
240
+ "Approach 1": "#00ffc8",
241
+ "Approach 2": "#7c6aff",
242
+ "Approach 3": "#ff6b6b",
243
+ }
244
+
245
+ st.subheader(f"🗓️ All Models — {next_date.strftime('%Y-%m-%d')} Signals")
246
+
247
+ cols = st.columns(len(all_signals))
248
+ for col, (name, info) in zip(cols, all_signals.items()):
249
+ color = APPROACH_COLORS.get(name, "#888888")
250
+ signal = info["signal"]
251
+ proba = info["proba"]
252
+ top_prob = float(np.max(proba)) * 100
253
+ is_winner = info["is_winner"]
254
+ border = f"3px solid {color}"
255
+ badge = " ⭐ WINNER" if is_winner else ""
256
+
257
+ col.markdown(f"""
258
+ <div style="border:{border}; border-radius:12px; padding:18px 16px;
259
+ background:#111118; text-align:center;">
260
+ <div style="color:{color}; font-size:11px; font-weight:700;
261
+ letter-spacing:2px; margin-bottom:6px;">
262
+ {name.upper()}{badge}
263
+ </div>
264
+ <div style="color:white; font-size:28px; font-weight:800;
265
+ margin:8px 0;">
266
+ {signal}
267
+ </div>
268
+ <div style="color:#aaa; font-size:12px;">
269
+ Top prob: <span style="color:{color}; font-weight:700;">{top_prob:.1f}%</span>
270
+ </div>
271
+ </div>
272
+ """, unsafe_allow_html=True)
hf_space/hf_space/hf_space/hf_space/hf_space/strategy/backtest.py CHANGED
@@ -55,6 +55,8 @@ def execute_strategy(
55
  cls = min(cls, n_etfs - 1)
56
  signal_etf = target_etfs[cls].replace("_Ret", "")
57
  realized_ret = float(y_raw_test[i][cls])
 
 
58
 
59
  net_ret = realized_ret - (fee_bps / 10000)
60
  strat_rets.append(net_ret)
 
55
  cls = min(cls, n_etfs - 1)
56
  signal_etf = target_etfs[cls].replace("_Ret", "")
57
  realized_ret = float(y_raw_test[i][cls])
58
+ # Sanity clip: daily returns should never exceed ±50%
59
+ realized_ret = max(-0.50, min(0.50, realized_ret))
60
 
61
  net_ret = realized_ret - (fee_bps / 10000)
62
  strat_rets.append(net_ret)
hf_space/hf_space/hf_space/ui/components.py CHANGED
@@ -1,43 +1,35 @@
1
  """
2
  ui/components.py
3
- Reusable Streamlit UI blocks:
4
- - Freshness warning banner
5
- - Next trading day signal banner
6
- - Signal conviction panel
7
- - Metrics row
8
- - Audit trail table
9
- - Comparison summary table
10
  """
11
 
12
  import streamlit as st
13
  import pandas as pd
14
  import numpy as np
15
-
16
  from signals.conviction import conviction_color, conviction_icon
17
 
18
 
19
- # ── Freshness warning ─────────────────────────────────────────────────────────
20
 
21
  def show_freshness_status(freshness: dict):
22
- """Display data freshness status. Stops app if data is stale."""
23
  if freshness.get("fresh"):
24
  st.success(freshness["message"])
25
  else:
26
  st.warning(freshness["message"])
27
 
28
 
29
- # ── Next trading day banner ───────────────────────────────────────────────────
30
 
31
  def show_signal_banner(next_signal: str, next_date, approach_name: str):
32
- """Large coloured banner showing the winning approach's next signal."""
33
  is_cash = next_signal == "CASH"
34
- bg = "linear-gradient(135deg, #2d3436 0%, #1a1a2e 100%)" if is_cash else \
35
- "linear-gradient(135deg, #00d1b2 0%, #00a896 100%)"
36
-
37
  st.markdown(f"""
38
  <div style="background:{bg}; padding:25px; border-radius:15px;
39
- text-align:center; box-shadow:0 8px 16px rgba(0,0,0,0.3);
40
- margin:16px 0;">
41
  <div style="color:rgba(255,255,255,0.7); font-size:12px;
42
  letter-spacing:3px; margin-bottom:6px;">
43
  {approach_name.upper()} · NEXT TRADING DAY SIGNAL
@@ -50,78 +42,98 @@ def show_signal_banner(next_signal: str, next_date, approach_name: str):
50
  """, unsafe_allow_html=True)
51
 
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  # ── Signal conviction panel ───────────────────────────────────────────────────
54
 
55
  def show_conviction_panel(conviction: dict):
56
- """
57
- White-background conviction panel with Z-score gauge and per-ETF bars.
58
- Uses separate st.markdown calls per ETF row to avoid Streamlit HTML escaping.
59
- """
60
- label = conviction["label"]
61
- z_score = conviction["z_score"]
62
- best_name = conviction["best_name"]
63
  sorted_pairs = conviction["sorted_pairs"]
 
 
64
 
65
- color = conviction_color(label)
66
- icon = conviction_icon(label)
67
-
68
- z_clipped = max(-3.0, min(3.0, z_score))
69
- bar_pct = int((z_clipped + 3) / 6 * 100)
70
-
71
- max_score = max(s for _, s in sorted_pairs) if sorted_pairs else 1.0
72
  if max_score <= 0:
73
  max_score = 1.0
74
 
75
- # ── Header + gauge ────────────────────────────────────────────────────────
76
  st.markdown(f"""
77
  <div style="background:#ffffff; border:1px solid #ddd;
78
  border-left:5px solid {color}; border-radius:12px 12px 0 0;
79
  padding:18px 24px 12px 24px; margin:12px 0 0 0;
80
  box-shadow:0 2px 8px rgba(0,0,0,0.07);">
81
-
82
- <div style="display:flex; align-items:center; gap:12px;
83
- margin-bottom:14px; flex-wrap:wrap;">
84
  <span style="font-size:20px;">{icon}</span>
85
  <span style="font-size:18px; font-weight:700; color:#1a1a1a;">Signal Conviction</span>
86
- <span style="background:#f0f0f0; border:1px solid {color};
87
- color:{color}; font-weight:700; font-size:14px;
88
- padding:3px 12px; border-radius:8px;">
89
  Z = {z_score:.2f} &sigma;
90
  </span>
91
  <span style="margin-left:auto; background:{color}; color:#fff;
92
- font-weight:700; padding:4px 16px;
93
- border-radius:20px; font-size:13px;">
94
  {label}
95
  </span>
96
  </div>
97
-
98
  <div style="display:flex; justify-content:space-between;
99
  font-size:11px; color:#999; margin-bottom:4px;">
100
- <span>Weak &minus;3&sigma;</span>
101
- <span>Neutral 0&sigma;</span>
102
- <span>Strong +3&sigma;</span>
103
  </div>
104
- <div style="background:#f0f0f0; border-radius:8px; height:14px;
105
- overflow:hidden; position:relative; border:1px solid #e0e0e0;
106
- margin-bottom:14px;">
107
- <div style="position:absolute; left:50%; top:0; width:2px;
108
- height:100%; background:#ccc;"></div>
109
  <div style="width:{bar_pct}%; height:100%;
110
- background:linear-gradient(90deg,#fab1a0,{color});
111
- border-radius:8px;"></div>
112
  </div>
113
-
114
  <div style="font-size:12px; color:#999; margin-bottom:2px;">
115
  Model probability by ETF (ranked high &rarr; low):
116
  </div>
117
  </div>
118
  """, unsafe_allow_html=True)
119
 
120
- # ── Per-ETF rows ──────────────────────────────────────────────────────────
121
  for i, (name, score) in enumerate(sorted_pairs):
122
- is_winner = (name == best_name)
123
- is_last = (i == len(sorted_pairs) - 1)
124
- bar_w = int(score / max_score * 100)
125
  name_style = "font-weight:700; color:#00897b;" if is_winner else "color:#444;"
126
  bar_color = color if is_winner else "#b2dfdb" if score > max_score * 0.5 else "#e0e0e0"
127
  star = " ★" if is_winner else ""
@@ -134,10 +146,9 @@ def show_conviction_panel(conviction: dict):
134
  box-shadow:0 2px 8px rgba(0,0,0,0.07);">
135
  <div style="display:flex; align-items:center; gap:12px;">
136
  <span style="width:44px; text-align:right; font-size:13px; {name_style}">{name}{star}</span>
137
- <div style="flex:1; background:#f5f5f5; border-radius:4px;
138
- height:14px; overflow:hidden; border:1px solid #e8e8e8;">
139
- <div style="width:{bar_w}%; height:100%;
140
- background:{bar_color}; border-radius:4px;"></div>
141
  </div>
142
  <span style="width:56px; font-size:12px; color:#888; text-align:right;">{score:.4f}</span>
143
  </div>
@@ -145,7 +156,7 @@ def show_conviction_panel(conviction: dict):
145
  """, unsafe_allow_html=True)
146
 
147
  st.caption(
148
- "Z-score = std deviations the top ETF's probability sits above the mean of all ETF probabilities. "
149
  "Higher → model is more decisive."
150
  )
151
 
@@ -153,60 +164,44 @@ def show_conviction_panel(conviction: dict):
153
  # ── Metrics row ───────────────────────────────────────────────────────────────
154
 
155
  def show_metrics_row(result: dict, tbill_rate: float):
156
- """Five-column metric display."""
157
  col1, col2, col3, col4, col5 = st.columns(5)
158
-
159
- col1.metric(
160
- "📈 Annualised Return",
161
- f"{result['ann_return']*100:.2f}%",
162
- delta=f"vs T-bill: {(result['ann_return'] - tbill_rate)*100:.2f}%",
163
- )
164
- col2.metric(
165
- "📊 Sharpe Ratio",
166
- f"{result['sharpe']:.2f}",
167
- delta="Risk-Adjusted" if result['sharpe'] > 1 else "Below Threshold",
168
- )
169
- col3.metric(
170
- "🎯 Hit Ratio (15d)",
171
- f"{result['hit_ratio']*100:.0f}%",
172
- delta="Strong" if result['hit_ratio'] > 0.6 else "Weak",
173
- )
174
- col4.metric(
175
- "📉 Max Drawdown",
176
- f"{result['max_dd']*100:.2f}%",
177
- delta="Peak to Trough",
178
- )
179
- col5.metric(
180
- "⚠️ Max Daily DD",
181
- f"{result['max_daily_dd']*100:.2f}%",
182
- delta="Worst Day",
183
- )
184
 
185
 
186
  # ── Comparison table ──────────────────────────────────────────────────────────
187
 
188
  def show_comparison_table(comparison_df: pd.DataFrame):
189
- """Styled comparison table for all three approaches."""
190
  def highlight_winner(row):
191
  if "WINNER" in str(row.get("Winner", "")):
192
  return ["background-color: rgba(0,200,150,0.15); font-weight:bold"] * len(row)
193
  return [""] * len(row)
194
 
195
- styled = comparison_df.style.apply(highlight_winner, axis=1).set_properties(**{
196
- "text-align": "center",
197
- "font-size": "14px",
198
- }).set_table_styles([
199
- {"selector": "th", "props": [("font-size", "14px"), ("font-weight", "bold"),
200
- ("text-align", "center")]},
201
- {"selector": "td", "props": [("padding", "10px")]},
202
- ])
 
 
 
203
  st.dataframe(styled, use_container_width=True)
204
 
205
 
206
  # ── Audit trail ───────────────────────────────────────────────────────────────
207
 
208
  def show_audit_trail(audit_trail: list):
209
- """Last 20 days styled audit trail."""
210
  if not audit_trail:
211
  st.info("No audit trail data available.")
212
  return
@@ -214,59 +209,19 @@ def show_audit_trail(audit_trail: list):
214
  df = pd.DataFrame(audit_trail).tail(20)[["Date", "Signal", "Net_Return"]]
215
 
216
  def color_return(val):
217
- return "color: #00c896; font-weight:bold" if val > 0 else "color: #ff4b4b; font-weight:bold"
218
-
219
- styled = df.style.applymap(color_return, subset=["Net_Return"]).format(
220
- {"Net_Return": "{:.2%}"}
221
- ).set_properties(**{
222
- "font-size": "16px",
223
- "text-align": "center",
224
- }).set_table_styles([
225
- {"selector": "th", "props": [("font-size", "16px"), ("font-weight", "bold"),
226
- ("text-align", "center")]},
227
- {"selector": "td", "props": [("padding", "10px")]},
228
- ])
 
 
 
229
  st.dataframe(styled, use_container_width=True, height=500)
230
-
231
-
232
- # ── All models' next day signals panel ───────────────────────────────────────
233
-
234
- def show_all_signals_panel(all_signals: dict, target_etfs: list, include_cash: bool, next_date):
235
- """
236
- Compact panel showing what each model predicts for next trading day,
237
- with top probability displayed.
238
- """
239
- APPROACH_COLORS = {
240
- "Approach 1": "#00ffc8",
241
- "Approach 2": "#7c6aff",
242
- "Approach 3": "#ff6b6b",
243
- }
244
-
245
- st.subheader(f"🗓️ All Models — {next_date.strftime('%Y-%m-%d')} Signals")
246
-
247
- cols = st.columns(len(all_signals))
248
- for col, (name, info) in zip(cols, all_signals.items()):
249
- color = APPROACH_COLORS.get(name, "#888888")
250
- signal = info["signal"]
251
- proba = info["proba"]
252
- top_prob = float(np.max(proba)) * 100
253
- is_winner = info["is_winner"]
254
- border = f"3px solid {color}"
255
- badge = " ⭐ WINNER" if is_winner else ""
256
-
257
- col.markdown(f"""
258
- <div style="border:{border}; border-radius:12px; padding:18px 16px;
259
- background:#111118; text-align:center;">
260
- <div style="color:{color}; font-size:11px; font-weight:700;
261
- letter-spacing:2px; margin-bottom:6px;">
262
- {name.upper()}{badge}
263
- </div>
264
- <div style="color:white; font-size:28px; font-weight:800;
265
- margin:8px 0;">
266
- {signal}
267
- </div>
268
- <div style="color:#aaa; font-size:12px;">
269
- Top prob: <span style="color:{color}; font-weight:700;">{top_prob:.1f}%</span>
270
- </div>
271
- </div>
272
- """, unsafe_allow_html=True)
 
1
  """
2
  ui/components.py
3
+ Reusable Streamlit UI blocks.
4
+ - Fixed applymap → map deprecation
5
+ - Removed debug expanders
6
+ - Added show_all_signals_panel
 
 
 
7
  """
8
 
9
  import streamlit as st
10
  import pandas as pd
11
  import numpy as np
 
12
  from signals.conviction import conviction_color, conviction_icon
13
 
14
 
15
+ # ── Freshness status ─────────────────────────────────────────────────────────
16
 
17
  def show_freshness_status(freshness: dict):
 
18
  if freshness.get("fresh"):
19
  st.success(freshness["message"])
20
  else:
21
  st.warning(freshness["message"])
22
 
23
 
24
+ # ── Winner signal banner ──────────────────────────────────────────────────────
25
 
26
  def show_signal_banner(next_signal: str, next_date, approach_name: str):
 
27
  is_cash = next_signal == "CASH"
28
+ bg = ("linear-gradient(135deg, #2d3436 0%, #1a1a2e 100%)" if is_cash
29
+ else "linear-gradient(135deg, #00d1b2 0%, #00a896 100%)")
 
30
  st.markdown(f"""
31
  <div style="background:{bg}; padding:25px; border-radius:15px;
32
+ text-align:center; box-shadow:0 8px 16px rgba(0,0,0,0.3); margin:16px 0;">
 
33
  <div style="color:rgba(255,255,255,0.7); font-size:12px;
34
  letter-spacing:3px; margin-bottom:6px;">
35
  {approach_name.upper()} · NEXT TRADING DAY SIGNAL
 
42
  """, unsafe_allow_html=True)
43
 
44
 
45
+ # ── All models signals panel ──────────────────────────────────────────────────
46
+
47
+ def show_all_signals_panel(all_signals: dict, target_etfs: list,
48
+ include_cash: bool, next_date, optimal_lookback: int):
49
+ APPROACH_COLORS = {
50
+ "Approach 1": "#00ffc8",
51
+ "Approach 2": "#7c6aff",
52
+ "Approach 3": "#ff6b6b",
53
+ }
54
+
55
+ st.subheader(f"🗓️ All Models — {next_date.strftime('%Y-%m-%d')} Signals")
56
+ st.caption(f"📐 Optimal lookback: **{optimal_lookback}d** (auto-selected from 30/45/60)")
57
+
58
+ cols = st.columns(len(all_signals))
59
+ for col, (name, info) in zip(cols, all_signals.items()):
60
+ color = APPROACH_COLORS.get(name, "#888888")
61
+ signal = info["signal"]
62
+ proba = info["proba"]
63
+ top_prob = float(np.max(proba)) * 100
64
+ is_winner = info["is_winner"]
65
+ badge = " ⭐" if is_winner else ""
66
+
67
+ col.markdown(f"""
68
+ <div style="border:2px solid {color}; border-radius:12px; padding:18px 16px;
69
+ background:#111118; text-align:center; margin-bottom:8px;">
70
+ <div style="color:{color}; font-size:10px; font-weight:700;
71
+ letter-spacing:2px; margin-bottom:6px;">
72
+ {name.upper()}{badge}
73
+ </div>
74
+ <div style="color:white; font-size:30px; font-weight:800; margin:8px 0;">
75
+ {signal}
76
+ </div>
77
+ <div style="color:#aaa; font-size:12px;">
78
+ Confidence: <span style="color:{color}; font-weight:700;">{top_prob:.1f}%</span>
79
+ </div>
80
+ </div>
81
+ """, unsafe_allow_html=True)
82
+
83
+
84
  # ── Signal conviction panel ───────────────────────────────────────────────────
85
 
86
  def show_conviction_panel(conviction: dict):
87
+ label = conviction["label"]
88
+ z_score = conviction["z_score"]
89
+ best_name = conviction["best_name"]
 
 
 
 
90
  sorted_pairs = conviction["sorted_pairs"]
91
+ color = conviction_color(label)
92
+ icon = conviction_icon(label)
93
 
94
+ z_clipped = max(-3.0, min(3.0, z_score))
95
+ bar_pct = int((z_clipped + 3) / 6 * 100)
96
+ max_score = max((s for _, s in sorted_pairs), default=1.0)
 
 
 
 
97
  if max_score <= 0:
98
  max_score = 1.0
99
 
 
100
  st.markdown(f"""
101
  <div style="background:#ffffff; border:1px solid #ddd;
102
  border-left:5px solid {color}; border-radius:12px 12px 0 0;
103
  padding:18px 24px 12px 24px; margin:12px 0 0 0;
104
  box-shadow:0 2px 8px rgba(0,0,0,0.07);">
105
+ <div style="display:flex; align-items:center; gap:12px; margin-bottom:14px; flex-wrap:wrap;">
 
 
106
  <span style="font-size:20px;">{icon}</span>
107
  <span style="font-size:18px; font-weight:700; color:#1a1a1a;">Signal Conviction</span>
108
+ <span style="background:#f0f0f0; border:1px solid {color}; color:{color};
109
+ font-weight:700; font-size:14px; padding:3px 12px; border-radius:8px;">
 
110
  Z = {z_score:.2f} &sigma;
111
  </span>
112
  <span style="margin-left:auto; background:{color}; color:#fff;
113
+ font-weight:700; padding:4px 16px; border-radius:20px; font-size:13px;">
 
114
  {label}
115
  </span>
116
  </div>
 
117
  <div style="display:flex; justify-content:space-between;
118
  font-size:11px; color:#999; margin-bottom:4px;">
119
+ <span>Weak &minus;3&sigma;</span><span>Neutral 0&sigma;</span><span>Strong +3&sigma;</span>
 
 
120
  </div>
121
+ <div style="background:#f0f0f0; border-radius:8px; height:14px; overflow:hidden;
122
+ position:relative; border:1px solid #e0e0e0; margin-bottom:14px;">
123
+ <div style="position:absolute; left:50%; top:0; width:2px; height:100%; background:#ccc;"></div>
 
 
124
  <div style="width:{bar_pct}%; height:100%;
125
+ background:linear-gradient(90deg,#fab1a0,{color}); border-radius:8px;"></div>
 
126
  </div>
 
127
  <div style="font-size:12px; color:#999; margin-bottom:2px;">
128
  Model probability by ETF (ranked high &rarr; low):
129
  </div>
130
  </div>
131
  """, unsafe_allow_html=True)
132
 
 
133
  for i, (name, score) in enumerate(sorted_pairs):
134
+ is_winner = (name == best_name)
135
+ is_last = (i == len(sorted_pairs) - 1)
136
+ bar_w = int(score / max_score * 100)
137
  name_style = "font-weight:700; color:#00897b;" if is_winner else "color:#444;"
138
  bar_color = color if is_winner else "#b2dfdb" if score > max_score * 0.5 else "#e0e0e0"
139
  star = " ★" if is_winner else ""
 
146
  box-shadow:0 2px 8px rgba(0,0,0,0.07);">
147
  <div style="display:flex; align-items:center; gap:12px;">
148
  <span style="width:44px; text-align:right; font-size:13px; {name_style}">{name}{star}</span>
149
+ <div style="flex:1; background:#f5f5f5; border-radius:4px; height:14px;
150
+ overflow:hidden; border:1px solid #e8e8e8;">
151
+ <div style="width:{bar_w}%; height:100%; background:{bar_color}; border-radius:4px;"></div>
 
152
  </div>
153
  <span style="width:56px; font-size:12px; color:#888; text-align:right;">{score:.4f}</span>
154
  </div>
 
156
  """, unsafe_allow_html=True)
157
 
158
  st.caption(
159
+ "Z-score = std deviations the top ETF's probability sits above the mean. "
160
  "Higher → model is more decisive."
161
  )
162
 
 
164
  # ── Metrics row ───────────────────────────────────────────────────────────────
165
 
166
  def show_metrics_row(result: dict, tbill_rate: float):
 
167
  col1, col2, col3, col4, col5 = st.columns(5)
168
+ col1.metric("📈 Ann. Return", f"{result['ann_return']*100:.2f}%",
169
+ delta=f"vs T-bill: {(result['ann_return'] - tbill_rate)*100:.2f}%")
170
+ col2.metric("📊 Sharpe", f"{result['sharpe']:.2f}",
171
+ delta="Strong" if result['sharpe'] > 1 else "Weak")
172
+ col3.metric("🎯 Hit Ratio 15d", f"{result['hit_ratio']*100:.0f}%",
173
+ delta="Good" if result['hit_ratio'] > 0.55 else "Weak")
174
+ col4.metric("📉 Max Drawdown", f"{result['max_dd']*100:.2f}%",
175
+ delta="Peak to Trough")
176
+ col5.metric("⚠️ Max Daily DD", f"{result['max_daily_dd']*100:.2f}%",
177
+ delta="Worst Day")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
 
180
  # ── Comparison table ──────────────────────────────────────────────────────────
181
 
182
  def show_comparison_table(comparison_df: pd.DataFrame):
 
183
  def highlight_winner(row):
184
  if "WINNER" in str(row.get("Winner", "")):
185
  return ["background-color: rgba(0,200,150,0.15); font-weight:bold"] * len(row)
186
  return [""] * len(row)
187
 
188
+ styled = (
189
+ comparison_df.style
190
+ .apply(highlight_winner, axis=1)
191
+ .set_properties(**{"text-align": "center", "font-size": "14px"})
192
+ .set_table_styles([
193
+ {"selector": "th", "props": [("font-size", "14px"),
194
+ ("font-weight", "bold"),
195
+ ("text-align", "center")]},
196
+ {"selector": "td", "props": [("padding", "10px")]},
197
+ ])
198
+ )
199
  st.dataframe(styled, use_container_width=True)
200
 
201
 
202
  # ── Audit trail ───────────────────────────────────────────────────────────────
203
 
204
  def show_audit_trail(audit_trail: list):
 
205
  if not audit_trail:
206
  st.info("No audit trail data available.")
207
  return
 
209
  df = pd.DataFrame(audit_trail).tail(20)[["Date", "Signal", "Net_Return"]]
210
 
211
  def color_return(val):
212
+ return ("color: #00c896; font-weight:bold" if val > 0
213
+ else "color: #ff4b4b; font-weight:bold")
214
+
215
+ styled = (
216
+ df.style
217
+ .map(color_return, subset=["Net_Return"])
218
+ .format({"Net_Return": "{:.2%}"})
219
+ .set_properties(**{"font-size": "14px", "text-align": "center"})
220
+ .set_table_styles([
221
+ {"selector": "th", "props": [("font-size", "14px"),
222
+ ("font-weight", "bold"),
223
+ ("text-align", "center")]},
224
+ {"selector": "td", "props": [("padding", "10px")]},
225
+ ])
226
+ )
227
  st.dataframe(styled, use_container_width=True, height=500)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hf_space/hf_space/ui/charts.py CHANGED
@@ -1,22 +1,15 @@
1
  """
2
  ui/charts.py
3
- All Plotly chart builders for the Streamlit UI.
 
4
  """
5
 
6
  import numpy as np
7
  import pandas as pd
8
  import plotly.graph_objects as go
9
 
10
-
11
- APPROACH_COLOURS = {
12
- "Approach 1": "#00ffc8",
13
- "Approach 2": "#7c6aff",
14
- "Approach 3": "#ff6b6b",
15
- }
16
- BENCHMARK_COLOURS = {
17
- "SPY": "#ff4b4b",
18
- "AGG": "#ffa500",
19
- }
20
 
21
 
22
  def equity_curve_chart(
@@ -28,117 +21,72 @@ def equity_curve_chart(
28
  tbill_rate: float,
29
  ) -> go.Figure:
30
  """
31
- Equity curve chart showing all three approaches + SPY + AGG benchmarks.
32
-
33
- Args:
34
- results : {approach_name: result_dict}
35
- winner_name : highlighted approach
36
- plot_dates : DatetimeIndex for x-axis
37
- df : full DataFrame (for benchmark columns)
38
- test_slice : slice object to extract test-period benchmark returns
39
- tbill_rate : for benchmark metric calculation
40
  """
41
  from strategy.backtest import compute_benchmark_metrics
42
 
43
  fig = go.Figure()
44
 
45
- # ── Strategy lines ───────────────────────────────────────────────────────
46
- for name, res in results.items():
47
- if res is None:
48
- continue
49
- colour = APPROACH_COLOURS.get(name, "#aaaaaa")
50
- width = 3 if name == winner_name else 1.5
51
- dash = "solid" if name == winner_name else "dot"
52
-
53
- n = min(len(res["cum_returns"]), len(plot_dates))
54
-
55
  fig.add_trace(go.Scatter(
56
  x=plot_dates[:n],
57
- y=res["cum_returns"][:n],
58
  mode="lines",
59
- name=f"{name} {'' if name == winner_name else ''}",
60
- line=dict(color=colour, width=width, dash=dash),
61
- fill="tozeroy" if name == winner_name else None,
62
- fillcolor=f"rgba({_hex_to_rgb(colour)},0.07)" if name == winner_name else None,
63
  ))
64
 
65
- # ── Benchmark: SPY ────────────────────────────────────────────────────────
66
  if "SPY_Ret" in df.columns:
67
- spy_rets = df["SPY_Ret"].iloc[test_slice].values
 
 
68
  n = min(len(spy_rets), len(plot_dates))
69
  spy_m = compute_benchmark_metrics(spy_rets[:n], tbill_rate)
70
  fig.add_trace(go.Scatter(
71
  x=plot_dates[:n],
72
- y=spy_m["cum_returns"],
73
  mode="lines",
74
- name="SPY (Equity BM)",
75
  line=dict(color=BENCHMARK_COLOURS["SPY"], width=1.5, dash="dot"),
76
  ))
77
 
78
- # ── Benchmark: AGG ────────────────────────────────────────────────────────
79
  if "AGG_Ret" in df.columns:
80
- agg_rets = df["AGG_Ret"].iloc[test_slice].values
 
 
81
  n = min(len(agg_rets), len(plot_dates))
82
  agg_m = compute_benchmark_metrics(agg_rets[:n], tbill_rate)
83
  fig.add_trace(go.Scatter(
84
  x=plot_dates[:n],
85
- y=agg_m["cum_returns"],
86
  mode="lines",
87
- name="AGG (Bond BM)",
88
  line=dict(color=BENCHMARK_COLOURS["AGG"], width=1.5, dash="dot"),
89
  ))
90
 
91
  fig.update_layout(
92
  template="plotly_dark",
93
- height=460,
94
  hovermode="x unified",
95
- showlegend=True,
96
  legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01, font=dict(size=11)),
97
  xaxis_title="Date",
98
- yaxis_title="Cumulative Return (×)",
99
  margin=dict(l=50, r=30, t=20, b=50),
 
100
  )
101
  return fig
102
 
103
 
104
- def comparison_bar_chart(results: dict, winner_name: str) -> go.Figure:
105
- """
106
- Horizontal bar chart comparing annualised returns across all three approaches.
107
- """
108
- names = []
109
- returns = []
110
- colours = []
111
-
112
- for name, res in results.items():
113
- if res is None:
114
- continue
115
- names.append(name)
116
- returns.append(res["ann_return"] * 100)
117
- colours.append(APPROACH_COLOURS.get(name, "#aaaaaa"))
118
-
119
- fig = go.Figure(go.Bar(
120
- x=returns,
121
- y=names,
122
- orientation="h",
123
- marker_color=colours,
124
- text=[f"{r:.1f}%" for r in returns],
125
- textposition="auto",
126
- ))
127
-
128
- fig.update_layout(
129
- template="plotly_dark",
130
- height=200,
131
- xaxis_title="Annualised Return (%)",
132
- margin=dict(l=100, r=30, t=10, b=40),
133
- showlegend=False,
134
- )
135
- return fig
136
-
137
-
138
- # ── Helper ────────────────────────────────────────────────────────────────────
139
-
140
  def _hex_to_rgb(hex_color: str) -> str:
141
- """Convert #rrggbb to 'r,g,b' string for rgba()."""
142
  h = hex_color.lstrip("#")
143
  r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
144
  return f"{r},{g},{b}"
 
1
  """
2
  ui/charts.py
3
+ Plotly chart builders.
4
+ Equity curve: winner + SPY + AGG only. Y-axis as % growth (not raw multiplier).
5
  """
6
 
7
  import numpy as np
8
  import pandas as pd
9
  import plotly.graph_objects as go
10
 
11
+ WINNER_COLOUR = "#00ffc8"
12
+ BENCHMARK_COLOURS = {"SPY": "#ff4b4b", "AGG": "#ffa500"}
 
 
 
 
 
 
 
 
13
 
14
 
15
  def equity_curve_chart(
 
21
  tbill_rate: float,
22
  ) -> go.Figure:
23
  """
24
+ Equity curve: winner strategy vs SPY and AGG.
25
+ Y-axis shows % growth (cum_return - 1) * 100 for readability.
 
 
 
 
 
 
 
26
  """
27
  from strategy.backtest import compute_benchmark_metrics
28
 
29
  fig = go.Figure()
30
 
31
+ # ── Winner strategy ───────────────────────────────────────────────────────
32
+ winner_res = results.get(winner_name)
33
+ if winner_res is not None:
34
+ cum = winner_res["cum_returns"]
35
+ n = min(len(cum), len(plot_dates))
 
 
 
 
 
36
  fig.add_trace(go.Scatter(
37
  x=plot_dates[:n],
38
+ y=(cum[:n] - 1) * 100,
39
  mode="lines",
40
+ name=f"{winner_name} ★",
41
+ line=dict(color=WINNER_COLOUR, width=2.5),
42
+ fill="tozeroy",
43
+ fillcolor="rgba(0,255,200,0.07)",
44
  ))
45
 
46
+ # ── SPY benchmark ────────────────────────────────────────────────────────
47
  if "SPY_Ret" in df.columns:
48
+ spy_rets = df["SPY_Ret"].iloc[test_slice].values.copy()
49
+ spy_rets = np.clip(spy_rets, -0.5, 0.5) # sanity clip
50
+ spy_rets = spy_rets[~np.isnan(spy_rets)]
51
  n = min(len(spy_rets), len(plot_dates))
52
  spy_m = compute_benchmark_metrics(spy_rets[:n], tbill_rate)
53
  fig.add_trace(go.Scatter(
54
  x=plot_dates[:n],
55
+ y=(spy_m["cum_returns"] - 1) * 100,
56
  mode="lines",
57
+ name="SPY",
58
  line=dict(color=BENCHMARK_COLOURS["SPY"], width=1.5, dash="dot"),
59
  ))
60
 
61
+ # ── AGG benchmark ────────────────────────────────────────────────────────
62
  if "AGG_Ret" in df.columns:
63
+ agg_rets = df["AGG_Ret"].iloc[test_slice].values.copy()
64
+ agg_rets = np.clip(agg_rets, -0.5, 0.5)
65
+ agg_rets = agg_rets[~np.isnan(agg_rets)]
66
  n = min(len(agg_rets), len(plot_dates))
67
  agg_m = compute_benchmark_metrics(agg_rets[:n], tbill_rate)
68
  fig.add_trace(go.Scatter(
69
  x=plot_dates[:n],
70
+ y=(agg_m["cum_returns"] - 1) * 100,
71
  mode="lines",
72
+ name="AGG",
73
  line=dict(color=BENCHMARK_COLOURS["AGG"], width=1.5, dash="dot"),
74
  ))
75
 
76
  fig.update_layout(
77
  template="plotly_dark",
78
+ height=420,
79
  hovermode="x unified",
 
80
  legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01, font=dict(size=11)),
81
  xaxis_title="Date",
82
+ yaxis_title="Cumulative Return (%)",
83
  margin=dict(l=50, r=30, t=20, b=50),
84
+ yaxis=dict(ticksuffix="%"),
85
  )
86
  return fig
87
 
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  def _hex_to_rgb(hex_color: str) -> str:
 
90
  h = hex_color.lstrip("#")
91
  r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
92
  return f"{r},{g},{b}"
hf_space/models/base.py CHANGED
@@ -1,28 +1,53 @@
1
  """
2
  models/base.py
3
- Shared utilities for all three CNN-LSTM variants.
4
- Key fix: class_weight support to prevent majority-class collapse.
5
  """
6
 
7
  import numpy as np
8
- import pandas as pd
 
 
 
9
  from sklearn.preprocessing import RobustScaler
10
  from sklearn.utils.class_weight import compute_class_weight
11
- import tensorflow as tf
12
- from tensorflow import keras
13
 
14
- SEED = 42
15
- tf.random.set_seed(SEED)
 
 
16
  np.random.seed(SEED)
17
 
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  # ── Sequence builder ──────────────────────────────────────────────────────────
20
 
21
  def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int):
22
- """
23
- Build supervised sequences for CNN-LSTM input.
24
- X[i] = features[i : i+lookback] → predicts y[i+lookback]
25
- """
26
  X, y = [], []
27
  for i in range(lookback, len(features)):
28
  X.append(features[i - lookback: i])
@@ -36,35 +61,25 @@ def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15):
36
  n = len(X)
37
  t1 = int(n * train_pct)
38
  t2 = int(n * (train_pct + val_pct))
39
- return (
40
- X[:t1], y[:t1],
41
- X[t1:t2], y[t1:t2],
42
- X[t2:], y[t2:],
43
- )
44
 
45
 
46
  # ── Feature scaling ───────────────────────────────────────────────────────────
47
 
48
  def scale_features(X_train, X_val, X_test):
49
- n_feat = X_train.shape[2]
50
- scaler = RobustScaler()
51
  scaler.fit(X_train.reshape(-1, n_feat))
52
-
53
  def _t(X):
54
  s = X.shape
55
  return scaler.transform(X.reshape(-1, n_feat)).reshape(s)
56
-
57
  return _t(X_train), _t(X_val), _t(X_test), scaler
58
 
59
 
60
  # ── Label builder ─────────────────────────────────────────────────────────────
61
 
62
  def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0):
63
- """
64
- Assign label = argmax(returns).
65
- If include_cash and best return < cash_threshold → label = n_etfs (CASH).
66
- """
67
- best = np.argmax(y_raw, axis=1)
68
  if include_cash:
69
  best_ret = y_raw[np.arange(len(y_raw)), best]
70
  cash_idx = y_raw.shape[1]
@@ -77,35 +92,22 @@ def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0):
77
  # ── Class weights ─────────────────────────────────────────────────────────────
78
 
79
  def compute_class_weights(y_labels: np.ndarray, n_classes: int) -> dict:
80
- """
81
- Compute balanced class weights to counteract majority-class collapse.
82
- Returns dict {class_index: weight} for use in model.fit().
83
- """
84
- classes = np.arange(n_classes)
85
  present = np.unique(y_labels)
86
-
87
  try:
88
- weights = compute_class_weight(
89
- class_weight="balanced",
90
- classes=present,
91
- y=y_labels,
92
- )
93
  weight_dict = {int(c): float(w) for c, w in zip(present, weights)}
94
  except Exception:
95
  weight_dict = {}
96
-
97
- # Fill any missing classes with weight 1.0
98
- for c in classes:
99
  if c not in weight_dict:
100
  weight_dict[c] = 1.0
101
-
102
  return weight_dict
103
 
104
 
105
  # ── Callbacks ─────────────────────────────────────────────────────────────────
106
 
107
- def get_callbacks(patience_es=20, patience_lr=10, min_lr=1e-6):
108
- """Longer patience to allow models time to learn past majority class."""
109
  return [
110
  keras.callbacks.EarlyStopping(
111
  monitor="val_loss",
@@ -123,51 +125,76 @@ def get_callbacks(patience_es=20, patience_lr=10, min_lr=1e-6):
123
  ]
124
 
125
 
126
- # ── Output head ───────────────────────────────────────────────────────────────
127
 
128
  def classification_head(x, n_classes: int, dropout: float = 0.3):
129
- x = keras.layers.Dense(64, activation="relu")(x)
130
- x = keras.layers.BatchNormalization()(x)
131
- x = keras.layers.Dropout(dropout)(x)
132
  x = keras.layers.Dense(32, activation="relu")(x)
133
- x = keras.layers.Dropout(dropout / 2)(x)
134
  x = keras.layers.Dense(n_classes, activation="softmax")(x)
135
  return x
136
 
137
 
138
- # ── Prediction ────────────────────────────────────────────────────────────────
139
-
140
- def predict_classes(model, X_test: np.ndarray) -> tuple:
141
- proba = model.predict(X_test, verbose=0)
142
- return np.argmax(proba, axis=1), proba
143
-
144
 
145
- # ── Metrics ───────────────────────────────────────────────────────────────────
146
-
147
- def evaluate_returns(
148
- preds, proba, y_raw_test, target_etfs, tbill_rate, fee_bps, include_cash=True,
 
 
 
 
 
149
  ):
150
- n_etfs = len(target_etfs)
151
- daily_tbill = tbill_rate / 252
152
- strat_rets = []
153
-
154
- for i, cls in enumerate(preds):
155
- if include_cash and cls == n_etfs:
156
- net = daily_tbill - fee_bps / 10000
157
- else:
158
- cls = min(int(cls), n_etfs - 1)
159
- net = float(y_raw_test[i][cls]) - fee_bps / 10000
160
- strat_rets.append(net)
161
-
162
- strat_rets = np.array(strat_rets)
163
- cum_returns = np.cumprod(1 + strat_rets)
164
- ann_return = cum_returns[-1] ** (252 / len(strat_rets)) - 1
165
-
166
- last_proba = proba[-1]
167
- next_cls = int(np.argmax(last_proba))
168
- next_etf = (
169
- "CASH" if (include_cash and next_cls == n_etfs)
170
- else target_etfs[min(next_cls, n_etfs - 1)].replace("_Ret", "")
171
- )
172
-
173
- return strat_rets, ann_return, cum_returns, last_proba, next_etf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
  models/base.py
3
+ Shared utilities for all CNN-LSTM variants.
4
+ Optimised for CPU training on HF Spaces.
5
  """
6
 
7
  import numpy as np
8
+ import hashlib
9
+ import pickle
10
+ import os
11
+ from pathlib import Path
12
  from sklearn.preprocessing import RobustScaler
13
  from sklearn.utils.class_weight import compute_class_weight
 
 
14
 
15
+ SEED = 42
16
+ CACHE_DIR = Path("/tmp/p2_model_cache")
17
+ CACHE_DIR.mkdir(exist_ok=True)
18
+
19
  np.random.seed(SEED)
20
 
21
 
22
+ # ── Cache helpers ─────────────────────────────────────────────────────────────
23
+
24
+ def make_cache_key(last_date: str, start_yr: int, fee_bps: int,
25
+ epochs: int, split: str, include_cash: bool,
26
+ lookback: int) -> str:
27
+ raw = f"{last_date}_{start_yr}_{fee_bps}_{epochs}_{split}_{include_cash}_{lookback}"
28
+ return hashlib.md5(raw.encode()).hexdigest()
29
+
30
+
31
+ def save_cache(key: str, payload: dict):
32
+ path = CACHE_DIR / f"{key}.pkl"
33
+ with open(path, "wb") as f:
34
+ pickle.dump(payload, f)
35
+
36
+
37
+ def load_cache(key: str) -> dict | None:
38
+ path = CACHE_DIR / f"{key}.pkl"
39
+ if path.exists():
40
+ try:
41
+ with open(path, "rb") as f:
42
+ return pickle.load(f)
43
+ except Exception:
44
+ path.unlink(missing_ok=True)
45
+ return None
46
+
47
+
48
  # ── Sequence builder ──────────────────────────────────────────────────────────
49
 
50
  def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int):
 
 
 
 
51
  X, y = [], []
52
  for i in range(lookback, len(features)):
53
  X.append(features[i - lookback: i])
 
61
  n = len(X)
62
  t1 = int(n * train_pct)
63
  t2 = int(n * (train_pct + val_pct))
64
+ return X[:t1], y[:t1], X[t1:t2], y[t1:t2], X[t2:], y[t2:]
 
 
 
 
65
 
66
 
67
  # ── Feature scaling ───────────────────────────────────────────────────────────
68
 
69
  def scale_features(X_train, X_val, X_test):
70
+ n_feat = X_train.shape[2]
71
+ scaler = RobustScaler()
72
  scaler.fit(X_train.reshape(-1, n_feat))
 
73
  def _t(X):
74
  s = X.shape
75
  return scaler.transform(X.reshape(-1, n_feat)).reshape(s)
 
76
  return _t(X_train), _t(X_val), _t(X_test), scaler
77
 
78
 
79
  # ── Label builder ─────────────────────────────────────────────────────────────
80
 
81
  def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0):
82
+ best = np.argmax(y_raw, axis=1)
 
 
 
 
83
  if include_cash:
84
  best_ret = y_raw[np.arange(len(y_raw)), best]
85
  cash_idx = y_raw.shape[1]
 
92
  # ── Class weights ─────────────────────────────────────────────────────────────
93
 
94
  def compute_class_weights(y_labels: np.ndarray, n_classes: int) -> dict:
 
 
 
 
 
95
  present = np.unique(y_labels)
 
96
  try:
97
+ weights = compute_class_weight("balanced", classes=present, y=y_labels)
 
 
 
 
98
  weight_dict = {int(c): float(w) for c, w in zip(present, weights)}
99
  except Exception:
100
  weight_dict = {}
101
+ for c in range(n_classes):
 
 
102
  if c not in weight_dict:
103
  weight_dict[c] = 1.0
 
104
  return weight_dict
105
 
106
 
107
  # ── Callbacks ─────────────────────────────────────────────────────────────────
108
 
109
+ def get_callbacks(patience_es=15, patience_lr=8, min_lr=1e-6):
110
+ from tensorflow import keras
111
  return [
112
  keras.callbacks.EarlyStopping(
113
  monitor="val_loss",
 
125
  ]
126
 
127
 
128
+ # ── Lightweight output head (CPU-optimised) ───────────────────────────────────
129
 
130
  def classification_head(x, n_classes: int, dropout: float = 0.3):
131
+ """Smaller head than original — faster on CPU, less overfitting risk."""
132
+ from tensorflow import keras
 
133
  x = keras.layers.Dense(32, activation="relu")(x)
134
+ x = keras.layers.Dropout(dropout)(x)
135
  x = keras.layers.Dense(n_classes, activation="softmax")(x)
136
  return x
137
 
138
 
139
+ # ── Auto lookback selection ───────────────────────────────────────────────────
 
 
 
 
 
140
 
141
+ def find_best_lookback(
142
+ X_raw: np.ndarray,
143
+ y_raw: np.ndarray,
144
+ y_labels_fn,
145
+ train_pct: float,
146
+ val_pct: float,
147
+ n_classes: int,
148
+ include_cash: bool,
149
+ candidates: list = None,
150
  ):
151
+ """
152
+ Train a fast lightweight CNN on each lookback candidate using val loss.
153
+ Returns best lookback int.
154
+ Uses only Approach 1 architecture (fastest) to pick the winner.
155
+ """
156
+ from tensorflow import keras
157
+
158
+ if candidates is None:
159
+ candidates = [30, 45, 60]
160
+
161
+ best_lb = candidates[0]
162
+ best_loss = np.inf
163
+
164
+ for lb in candidates:
165
+ try:
166
+ X_seq, y_seq = build_sequences(X_raw, y_raw, lb)
167
+ y_lab = y_labels_fn(y_seq)
168
+
169
+ X_tr, y_tr, X_v, y_v, _, _ = train_val_test_split(X_seq, y_lab, train_pct, val_pct)
170
+ X_tr_s, X_v_s, _, _ = scale_features(X_tr, X_v, X_v)
171
+
172
+ cw = compute_class_weights(y_tr, n_classes)
173
+
174
+ # Tiny fast model just for lookback selection
175
+ inp = keras.Input(shape=X_tr_s.shape[1:])
176
+ x = keras.layers.Conv1D(16, min(3, lb), padding="causal", activation="relu")(inp)
177
+ x = keras.layers.GlobalAveragePooling1D()(x)
178
+ out = keras.layers.Dense(n_classes, activation="softmax")(x)
179
+ m = keras.Model(inp, out)
180
+ m.compile(optimizer="adam", loss="sparse_categorical_crossentropy")
181
+
182
+ hist = m.fit(
183
+ X_tr_s, y_tr,
184
+ validation_data=(X_v_s, y_v),
185
+ epochs=15,
186
+ batch_size=64,
187
+ class_weight=cw,
188
+ callbacks=[keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)],
189
+ verbose=0,
190
+ )
191
+ val_loss = min(hist.history.get("val_loss", [np.inf]))
192
+ if val_loss < best_loss:
193
+ best_loss = val_loss
194
+ best_lb = lb
195
+
196
+ del m
197
+ except Exception:
198
+ continue
199
+
200
+ return best_lb
models/approach1_wavelet.py CHANGED
@@ -1,14 +1,21 @@
1
  """
2
  models/approach1_wavelet.py
3
  Approach 1: Wavelet Decomposition CNN-LSTM
4
- With class weights to prevent majority-class collapse.
 
 
5
  """
6
 
7
  import numpy as np
8
  import pywt
9
 
10
  WAVELET = "db4"
11
- LEVEL = 3
 
 
 
 
 
12
 
13
 
14
  def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> np.ndarray:
@@ -16,13 +23,14 @@ def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> n
16
  coeffs = pywt.wavedec(signal, wavelet, level=level)
17
  bands = []
18
  for c in coeffs:
19
- band = np.interp(np.linspace(0, len(c)-1, T), np.arange(len(c)), c)
20
  bands.append(band)
21
  return np.stack(bands, axis=-1)
22
 
23
 
24
- def apply_wavelet_transform(X: np.ndarray, wavelet=WAVELET, level=LEVEL) -> np.ndarray:
25
  n_samples, lookback, n_features = X.shape
 
26
  n_bands = level + 1
27
  X_wt = np.zeros((n_samples, lookback, n_features * n_bands), dtype=np.float32)
28
  for s in range(n_samples):
@@ -33,18 +41,18 @@ def apply_wavelet_transform(X: np.ndarray, wavelet=WAVELET, level=LEVEL) -> np.n
33
  return X_wt
34
 
35
 
36
- def build_wavelet_cnn_lstm(input_shape, n_classes, dropout=0.3, lstm_units=128):
37
  from tensorflow import keras
38
  from models.base import classification_head
39
 
40
  inputs = keras.Input(shape=input_shape)
41
- x = keras.layers.Conv1D(64, 3, padding="causal", activation="relu")(inputs)
42
  x = keras.layers.BatchNormalization()(x)
43
  x = keras.layers.MaxPooling1D(2)(x)
44
- x = keras.layers.Conv1D(32, 3, padding="causal", activation="relu")(x)
45
  x = keras.layers.BatchNormalization()(x)
46
  x = keras.layers.Dropout(dropout)(x)
47
- x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(x)
48
  outputs = classification_head(x, n_classes, dropout)
49
 
50
  model = keras.Model(inputs, outputs, name="Approach1_Wavelet")
@@ -58,7 +66,7 @@ def build_wavelet_cnn_lstm(input_shape, n_classes, dropout=0.3, lstm_units=128):
58
 
59
  def train_approach1(
60
  X_train, y_train, X_val, y_val,
61
- n_classes, epochs=100, batch_size=32, dropout=0.3, lstm_units=128,
62
  ):
63
  from models.base import get_callbacks, compute_class_weights
64
 
 
1
  """
2
  models/approach1_wavelet.py
3
  Approach 1: Wavelet Decomposition CNN-LSTM
4
+ - Dynamic wavelet level based on sequence length (no boundary warnings)
5
+ - CPU-optimised smaller architecture
6
+ - Class weights to prevent majority-class collapse
7
  """
8
 
9
  import numpy as np
10
  import pywt
11
 
12
  WAVELET = "db4"
13
+
14
+
15
+ def _safe_wavelet_level(lookback: int, wavelet: str = WAVELET) -> int:
16
+ """Compute max safe wavelet level for the given sequence length."""
17
+ max_level = pywt.dwt_max_level(lookback, wavelet)
18
+ return min(2, max_level) # cap at 2 to avoid boundary effects
19
 
20
 
21
  def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> np.ndarray:
 
23
  coeffs = pywt.wavedec(signal, wavelet, level=level)
24
  bands = []
25
  for c in coeffs:
26
+ band = np.interp(np.linspace(0, len(c) - 1, T), np.arange(len(c)), c)
27
  bands.append(band)
28
  return np.stack(bands, axis=-1)
29
 
30
 
31
+ def apply_wavelet_transform(X: np.ndarray, wavelet: str = WAVELET) -> np.ndarray:
32
  n_samples, lookback, n_features = X.shape
33
+ level = _safe_wavelet_level(lookback, wavelet)
34
  n_bands = level + 1
35
  X_wt = np.zeros((n_samples, lookback, n_features * n_bands), dtype=np.float32)
36
  for s in range(n_samples):
 
41
  return X_wt
42
 
43
 
44
+ def build_wavelet_cnn_lstm(input_shape, n_classes, dropout=0.3, lstm_units=64):
45
  from tensorflow import keras
46
  from models.base import classification_head
47
 
48
  inputs = keras.Input(shape=input_shape)
49
+ x = keras.layers.Conv1D(32, 3, padding="causal", activation="relu")(inputs)
50
  x = keras.layers.BatchNormalization()(x)
51
  x = keras.layers.MaxPooling1D(2)(x)
52
+ x = keras.layers.Conv1D(16, 3, padding="causal", activation="relu")(x)
53
  x = keras.layers.BatchNormalization()(x)
54
  x = keras.layers.Dropout(dropout)(x)
55
+ x = keras.layers.LSTM(lstm_units, dropout=dropout)(x)
56
  outputs = classification_head(x, n_classes, dropout)
57
 
58
  model = keras.Model(inputs, outputs, name="Approach1_Wavelet")
 
66
 
67
  def train_approach1(
68
  X_train, y_train, X_val, y_val,
69
+ n_classes, epochs=80, batch_size=64, dropout=0.3, lstm_units=64,
70
  ):
71
  from models.base import get_callbacks, compute_class_weights
72