Spaces:
Running
Running
P2SAMAPA commited on
Update data_manager.py
Browse files- data_manager.py +63 -14
data_manager.py
CHANGED
|
@@ -119,7 +119,7 @@ def fetch_macro_data_robust(start_date="2008-01-01"):
|
|
| 119 |
|
| 120 |
|
| 121 |
def fetch_etf_data(etfs, start_date="2008-01-01"):
|
| 122 |
-
"""Fetch ETF price data and calculate returns"""
|
| 123 |
try:
|
| 124 |
etf_data = yf.download(
|
| 125 |
etfs,
|
|
@@ -134,18 +134,58 @@ def fetch_etf_data(etfs, start_date="2008-01-01"):
|
|
| 134 |
if etf_data.index.tz is not None:
|
| 135 |
etf_data.index = etf_data.index.tz_localize(None)
|
| 136 |
|
| 137 |
-
|
| 138 |
-
|
|
|
|
|
|
|
| 139 |
etf_returns.columns = [f"{col}_Ret" for col in etf_returns.columns]
|
| 140 |
-
|
| 141 |
-
#
|
| 142 |
-
etf_vol =
|
| 143 |
etf_vol.columns = [f"{col}_Vol" for col in etf_vol.columns]
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
return result
|
| 148 |
-
|
| 149 |
except Exception as e:
|
| 150 |
st.error(f"β ETF fetch failed: {e}")
|
| 151 |
return pd.DataFrame()
|
|
@@ -323,16 +363,25 @@ def get_data(start_year, force_refresh=False, clean_hf_dataset=False):
|
|
| 323 |
if not etf_data.empty and not macro_data.empty:
|
| 324 |
df = pd.concat([etf_data, macro_data], axis=1)
|
| 325 |
|
| 326 |
-
# Feature Engineering: Z-Scores
|
| 327 |
-
macro_cols = ['VIX', 'DXY', 'COPPER', 'GOLD', 'HY_Spread', 'T10Y2Y', 'T10Y3M',
|
| 328 |
'VIX_Spot', 'VIX_3M', 'VIX_Term_Slope']
|
| 329 |
-
|
| 330 |
for col in df.columns:
|
| 331 |
if any(m in col for m in macro_cols) or '_Vol' in col:
|
| 332 |
rolling_mean = df[col].rolling(20, min_periods=5).mean()
|
| 333 |
-
rolling_std
|
| 334 |
z_col = f"{col}_Z"
|
| 335 |
df[z_col] = (df[col] - rolling_mean) / (rolling_std + 1e-9)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
|
| 337 |
# Add regime features
|
| 338 |
st.write("π― **Adding Regime Detection Features...**")
|
|
|
|
| 119 |
|
| 120 |
|
| 121 |
def fetch_etf_data(etfs, start_date="2008-01-01"):
|
| 122 |
+
"""Fetch ETF price data and calculate returns + momentum features"""
|
| 123 |
try:
|
| 124 |
etf_data = yf.download(
|
| 125 |
etfs,
|
|
|
|
| 134 |
if etf_data.index.tz is not None:
|
| 135 |
etf_data.index = etf_data.index.tz_localize(None)
|
| 136 |
|
| 137 |
+
daily_rets = etf_data.pct_change()
|
| 138 |
+
|
| 139 |
+
# ββ Daily returns (targets will be built from these) βββββββββββββββββ
|
| 140 |
+
etf_returns = daily_rets.copy()
|
| 141 |
etf_returns.columns = [f"{col}_Ret" for col in etf_returns.columns]
|
| 142 |
+
|
| 143 |
+
# ββ 20-day realized volatility ββββββββββββββββββββββββββββββββββββββββ
|
| 144 |
+
etf_vol = daily_rets.rolling(20).std() * np.sqrt(252)
|
| 145 |
etf_vol.columns = [f"{col}_Vol" for col in etf_vol.columns]
|
| 146 |
+
|
| 147 |
+
# ββ Momentum features: rolling returns over multiple windows ββββββββββ
|
| 148 |
+
momentum_frames = []
|
| 149 |
+
for window in [5, 10, 21, 63]: # 1W, 2W, 1M, 3M
|
| 150 |
+
mom = etf_data.pct_change(window)
|
| 151 |
+
mom.columns = [f"{col}_Mom{window}d" for col in mom.columns]
|
| 152 |
+
momentum_frames.append(mom)
|
| 153 |
+
|
| 154 |
+
# ββ Relative strength vs SPY ββββββββββββββββββββββββββββββββββββββββββ
|
| 155 |
+
rel_frames = []
|
| 156 |
+
if 'SPY' in etf_data.columns:
|
| 157 |
+
spy_ret = etf_data['SPY'].pct_change(21)
|
| 158 |
+
for col in etf_data.columns:
|
| 159 |
+
if col != 'SPY':
|
| 160 |
+
rel = etf_data[col].pct_change(21) - spy_ret
|
| 161 |
+
rel_frames.append(rel.rename(f"{col}_RelSPY21d"))
|
| 162 |
+
|
| 163 |
+
# ββ Cross-sectional momentum rank (1=worst, 5=best among universe) βββ
|
| 164 |
+
target_etfs_only = [c for c in etf_data.columns
|
| 165 |
+
if c not in ['SPY', 'AGG']]
|
| 166 |
+
rank_frames = []
|
| 167 |
+
for window in [21, 63]:
|
| 168 |
+
mom_w = etf_data[target_etfs_only].pct_change(window)
|
| 169 |
+
ranked = mom_w.rank(axis=1, pct=True)
|
| 170 |
+
ranked.columns = [f"{col}_Rank{window}d" for col in ranked.columns]
|
| 171 |
+
rank_frames.append(ranked)
|
| 172 |
+
|
| 173 |
+
# ββ Recent trend: 5d and 10d price change βββββββββββββββββββββββββββββ
|
| 174 |
+
trend_frames = []
|
| 175 |
+
for window in [5, 10]:
|
| 176 |
+
trend = etf_data.pct_change(window)
|
| 177 |
+
trend.columns = [f"{col}_Trend{window}d" for col in trend.columns]
|
| 178 |
+
trend_frames.append(trend)
|
| 179 |
+
|
| 180 |
+
result = pd.concat(
|
| 181 |
+
[etf_returns, etf_vol] + momentum_frames +
|
| 182 |
+
(rel_frames if rel_frames else []) +
|
| 183 |
+
rank_frames + trend_frames,
|
| 184 |
+
axis=1
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
return result
|
| 188 |
+
|
| 189 |
except Exception as e:
|
| 190 |
st.error(f"β ETF fetch failed: {e}")
|
| 191 |
return pd.DataFrame()
|
|
|
|
| 363 |
if not etf_data.empty and not macro_data.empty:
|
| 364 |
df = pd.concat([etf_data, macro_data], axis=1)
|
| 365 |
|
| 366 |
+
# Feature Engineering: Z-Scores for macro + vol columns
|
| 367 |
+
macro_cols = ['VIX', 'DXY', 'COPPER', 'GOLD', 'HY_Spread', 'T10Y2Y', 'T10Y3M',
|
| 368 |
'VIX_Spot', 'VIX_3M', 'VIX_Term_Slope']
|
| 369 |
+
|
| 370 |
for col in df.columns:
|
| 371 |
if any(m in col for m in macro_cols) or '_Vol' in col:
|
| 372 |
rolling_mean = df[col].rolling(20, min_periods=5).mean()
|
| 373 |
+
rolling_std = df[col].rolling(20, min_periods=5).std()
|
| 374 |
z_col = f"{col}_Z"
|
| 375 |
df[z_col] = (df[col] - rolling_mean) / (rolling_std + 1e-9)
|
| 376 |
+
|
| 377 |
+
# Z-score the momentum/rank/trend features too so they're on same scale
|
| 378 |
+
mom_pattern_cols = [c for c in df.columns if any(
|
| 379 |
+
tag in c for tag in ['_Mom', '_RelSPY', '_Rank', '_Trend']
|
| 380 |
+
)]
|
| 381 |
+
for col in mom_pattern_cols:
|
| 382 |
+
rolling_mean = df[col].rolling(60, min_periods=10).mean()
|
| 383 |
+
rolling_std = df[col].rolling(60, min_periods=10).std()
|
| 384 |
+
df[f"{col}_Z"] = (df[col] - rolling_mean) / (rolling_std + 1e-9)
|
| 385 |
|
| 386 |
# Add regime features
|
| 387 |
st.write("π― **Adding Regime Detection Features...**")
|