Spaces:
Running
Running
P2SAMAPA commited on
Update data_manager.py
Browse files- data_manager.py +30 -33
data_manager.py
CHANGED
|
@@ -163,41 +163,38 @@ def smart_update_hf_dataset(new_data, token, force_upload=False):
|
|
| 163 |
if existing_df.index.tz is not None:
|
| 164 |
existing_df.index = existing_df.index.tz_localize(None)
|
| 165 |
|
| 166 |
-
# ββ Step 1:
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
| 177 |
if new_etf_cols:
|
| 178 |
-
st.info(f"π New ETFs
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
]
|
| 191 |
-
|
| 192 |
-
combined[col] = full_history.reindex(full_index)[col]
|
| 193 |
-
st.success(
|
| 194 |
-
f"β
Full history fetched for {new_etf_cols}: "
|
| 195 |
-
f"{len(full_history)} rows "
|
| 196 |
-
f"({full_history.index[0].date()} β {full_history.index[-1].date()}), "
|
| 197 |
-
f"{len(cols_to_backfill)} columns backfilled"
|
| 198 |
)
|
| 199 |
-
|
| 200 |
-
|
| 201 |
|
| 202 |
# ββ Step 3: decide whether to upload βββββββββββββββββββββββββββββββββ
|
| 203 |
new_rows = len(combined) - len(existing_df)
|
|
|
|
| 163 |
if existing_df.index.tz is not None:
|
| 164 |
existing_df.index = existing_df.index.tz_localize(None)
|
| 165 |
|
| 166 |
+
# ββ Step 1: fetch FULL history for all target ETFs from 2008 βββββββββ
|
| 167 |
+
# Always fetch full history so new ETFs get complete backfill and
|
| 168 |
+
# existing ETFs stay current. This is the authoritative data source.
|
| 169 |
+
st.info("π‘ Fetching full ETF history from 2008...")
|
| 170 |
+
full_etf = fetch_etf_data(ETF_LIST, start_date="2008-01-01")
|
| 171 |
+
if full_etf.index.tz is not None:
|
| 172 |
+
full_etf.index = full_etf.index.tz_localize(None)
|
| 173 |
+
|
| 174 |
+
# Detect which ETFs are new (missing or all-NaN in existing dataset)
|
| 175 |
+
new_etf_cols = [
|
| 176 |
+
etf for etf in ETF_LIST
|
| 177 |
+
if f"{etf}_Ret" not in existing_df.columns
|
| 178 |
+
or existing_df[f"{etf}_Ret"].isna().mean() > 0.9
|
| 179 |
+
]
|
| 180 |
if new_etf_cols:
|
| 181 |
+
st.info(f"π New ETFs: {new_etf_cols} β will be fully backfilled")
|
| 182 |
+
|
| 183 |
+
# ββ Step 2: build combined β full ETF history + existing macro ββββββββ
|
| 184 |
+
# Start from existing, expand index to cover all ETF dates
|
| 185 |
+
full_index = existing_df.index.union(full_etf.index)
|
| 186 |
+
combined = existing_df.reindex(full_index)
|
| 187 |
+
# Write all ETF columns from full history (overwrites stale/NaN data)
|
| 188 |
+
for col in full_etf.columns:
|
| 189 |
+
combined[col] = full_etf.reindex(full_index)[col]
|
| 190 |
+
# Merge in any macro columns from new_data not already in combined
|
| 191 |
+
for col in new_data.columns:
|
| 192 |
+
if col not in full_etf.columns:
|
| 193 |
+
combined[col] = new_data.reindex(full_index)[col].combine_first(
|
| 194 |
+
combined.get(col, pd.Series(dtype=float))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
)
|
| 196 |
+
st.success(f"β
ETF history: {len(full_etf)} rows, {len(full_etf.columns)} columns"
|
| 197 |
+
+ (f" | New ETFs backfilled: {new_etf_cols}" if new_etf_cols else ""))
|
| 198 |
|
| 199 |
# ββ Step 3: decide whether to upload βββββββββββββββββββββββββββββββββ
|
| 200 |
new_rows = len(combined) - len(existing_df)
|