P2SAMAPA commited on
Commit
3e06976
Β·
unverified Β·
1 Parent(s): 7605749

Update data_manager.py

Browse files
Files changed (1) hide show
  1. data_manager.py +63 -14
data_manager.py CHANGED
@@ -119,7 +119,7 @@ def fetch_macro_data_robust(start_date="2008-01-01"):
119
 
120
 
121
  def fetch_etf_data(etfs, start_date="2008-01-01"):
122
- """Fetch ETF price data and calculate returns"""
123
  try:
124
  etf_data = yf.download(
125
  etfs,
@@ -134,18 +134,58 @@ def fetch_etf_data(etfs, start_date="2008-01-01"):
134
  if etf_data.index.tz is not None:
135
  etf_data.index = etf_data.index.tz_localize(None)
136
 
137
- # Calculate daily returns
138
- etf_returns = etf_data.pct_change()
 
 
139
  etf_returns.columns = [f"{col}_Ret" for col in etf_returns.columns]
140
-
141
- # Calculate 20-day realized volatility
142
- etf_vol = etf_data.pct_change().rolling(20).std() * np.sqrt(252)
143
  etf_vol.columns = [f"{col}_Vol" for col in etf_vol.columns]
144
-
145
- result = pd.concat([etf_returns, etf_vol], axis=1)
146
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  return result
148
-
149
  except Exception as e:
150
  st.error(f"❌ ETF fetch failed: {e}")
151
  return pd.DataFrame()
@@ -323,16 +363,25 @@ def get_data(start_year, force_refresh=False, clean_hf_dataset=False):
323
  if not etf_data.empty and not macro_data.empty:
324
  df = pd.concat([etf_data, macro_data], axis=1)
325
 
326
- # Feature Engineering: Z-Scores
327
- macro_cols = ['VIX', 'DXY', 'COPPER', 'GOLD', 'HY_Spread', 'T10Y2Y', 'T10Y3M',
328
  'VIX_Spot', 'VIX_3M', 'VIX_Term_Slope']
329
-
330
  for col in df.columns:
331
  if any(m in col for m in macro_cols) or '_Vol' in col:
332
  rolling_mean = df[col].rolling(20, min_periods=5).mean()
333
- rolling_std = df[col].rolling(20, min_periods=5).std()
334
  z_col = f"{col}_Z"
335
  df[z_col] = (df[col] - rolling_mean) / (rolling_std + 1e-9)
 
 
 
 
 
 
 
 
 
336
 
337
  # Add regime features
338
  st.write("🎯 **Adding Regime Detection Features...**")
 
119
 
120
 
121
  def fetch_etf_data(etfs, start_date="2008-01-01"):
122
+ """Fetch ETF price data and calculate returns + momentum features"""
123
  try:
124
  etf_data = yf.download(
125
  etfs,
 
134
  if etf_data.index.tz is not None:
135
  etf_data.index = etf_data.index.tz_localize(None)
136
 
137
+ daily_rets = etf_data.pct_change()
138
+
139
+ # ── Daily returns (targets will be built from these) ─────────────────
140
+ etf_returns = daily_rets.copy()
141
  etf_returns.columns = [f"{col}_Ret" for col in etf_returns.columns]
142
+
143
+ # ── 20-day realized volatility ────────────────────────────────────────
144
+ etf_vol = daily_rets.rolling(20).std() * np.sqrt(252)
145
  etf_vol.columns = [f"{col}_Vol" for col in etf_vol.columns]
146
+
147
+ # ── Momentum features: rolling returns over multiple windows ──────────
148
+ momentum_frames = []
149
+ for window in [5, 10, 21, 63]: # 1W, 2W, 1M, 3M
150
+ mom = etf_data.pct_change(window)
151
+ mom.columns = [f"{col}_Mom{window}d" for col in mom.columns]
152
+ momentum_frames.append(mom)
153
+
154
+ # ── Relative strength vs SPY ──────────────────────────────────────────
155
+ rel_frames = []
156
+ if 'SPY' in etf_data.columns:
157
+ spy_ret = etf_data['SPY'].pct_change(21)
158
+ for col in etf_data.columns:
159
+ if col != 'SPY':
160
+ rel = etf_data[col].pct_change(21) - spy_ret
161
+ rel_frames.append(rel.rename(f"{col}_RelSPY21d"))
162
+
163
+ # ── Cross-sectional momentum rank (1=worst, 5=best among universe) ───
164
+ target_etfs_only = [c for c in etf_data.columns
165
+ if c not in ['SPY', 'AGG']]
166
+ rank_frames = []
167
+ for window in [21, 63]:
168
+ mom_w = etf_data[target_etfs_only].pct_change(window)
169
+ ranked = mom_w.rank(axis=1, pct=True)
170
+ ranked.columns = [f"{col}_Rank{window}d" for col in ranked.columns]
171
+ rank_frames.append(ranked)
172
+
173
+ # ── Recent trend: 5d and 10d price change ─────────────────────────────
174
+ trend_frames = []
175
+ for window in [5, 10]:
176
+ trend = etf_data.pct_change(window)
177
+ trend.columns = [f"{col}_Trend{window}d" for col in trend.columns]
178
+ trend_frames.append(trend)
179
+
180
+ result = pd.concat(
181
+ [etf_returns, etf_vol] + momentum_frames +
182
+ (rel_frames if rel_frames else []) +
183
+ rank_frames + trend_frames,
184
+ axis=1
185
+ )
186
+
187
  return result
188
+
189
  except Exception as e:
190
  st.error(f"❌ ETF fetch failed: {e}")
191
  return pd.DataFrame()
 
363
  if not etf_data.empty and not macro_data.empty:
364
  df = pd.concat([etf_data, macro_data], axis=1)
365
 
366
+ # Feature Engineering: Z-Scores for macro + vol columns
367
+ macro_cols = ['VIX', 'DXY', 'COPPER', 'GOLD', 'HY_Spread', 'T10Y2Y', 'T10Y3M',
368
  'VIX_Spot', 'VIX_3M', 'VIX_Term_Slope']
369
+
370
  for col in df.columns:
371
  if any(m in col for m in macro_cols) or '_Vol' in col:
372
  rolling_mean = df[col].rolling(20, min_periods=5).mean()
373
+ rolling_std = df[col].rolling(20, min_periods=5).std()
374
  z_col = f"{col}_Z"
375
  df[z_col] = (df[col] - rolling_mean) / (rolling_std + 1e-9)
376
+
377
+ # Z-score the momentum/rank/trend features too so they're on same scale
378
+ mom_pattern_cols = [c for c in df.columns if any(
379
+ tag in c for tag in ['_Mom', '_RelSPY', '_Rank', '_Trend']
380
+ )]
381
+ for col in mom_pattern_cols:
382
+ rolling_mean = df[col].rolling(60, min_periods=10).mean()
383
+ rolling_std = df[col].rolling(60, min_periods=10).std()
384
+ df[f"{col}_Z"] = (df[col] - rolling_mean) / (rolling_std + 1e-9)
385
 
386
  # Add regime features
387
  st.write("🎯 **Adding Regime Detection Features...**")