Spaces:
Running
Running
P2SAMAPA commited on
Update data_manager.py
Browse files- data_manager.py +4 -4
data_manager.py
CHANGED
|
@@ -191,7 +191,7 @@ def fetch_etf_data(etfs, start_date="2008-01-01"):
|
|
| 191 |
return pd.DataFrame()
|
| 192 |
|
| 193 |
|
| 194 |
-
def smart_update_hf_dataset(new_data, token):
|
| 195 |
"""Smart update: Only uploads if new data exists or gaps are filled.
|
| 196 |
|
| 197 |
Handles new ETFs added to ETF_LIST: detects columns present in new_data
|
|
@@ -265,7 +265,7 @@ def smart_update_hf_dataset(new_data, token):
|
|
| 265 |
|
| 266 |
# Force upload if new ETFs were backfilled (filled_gaps may undercount
|
| 267 |
# because existing_df was reindexed to match the new date union)
|
| 268 |
-
needs_update = new_rows > 0 or filled_gaps > 0 or len(new_etf_cols) > 0
|
| 269 |
|
| 270 |
if needs_update:
|
| 271 |
combined.index.name = "Date"
|
|
@@ -278,7 +278,7 @@ def smart_update_hf_dataset(new_data, token):
|
|
| 278 |
repo_id=REPO_ID,
|
| 279 |
repo_type="dataset",
|
| 280 |
token=token,
|
| 281 |
-
commit_message=f"Update: {get_est_time().strftime('%Y-%m-%d %H:%M EST')} | +{new_rows} rows, filled {filled_gaps} gaps" + (f", backfilled {new_etf_cols}" if new_etf_cols else "")
|
| 282 |
)
|
| 283 |
|
| 284 |
st.success(f"✅ Dataset updated: +{new_rows} rows, filled {filled_gaps} gaps")
|
|
@@ -397,7 +397,7 @@ def get_data(start_year, force_refresh=False, clean_hf_dataset=False):
|
|
| 397 |
if not etf_data.empty and not macro_data.empty:
|
| 398 |
new_df = pd.concat([etf_data, macro_data], axis=1)
|
| 399 |
token = os.getenv("HF_TOKEN")
|
| 400 |
-
df = smart_update_hf_dataset(new_df, token)
|
| 401 |
|
| 402 |
# Fetch fresh if still empty
|
| 403 |
if df.empty:
|
|
|
|
| 191 |
return pd.DataFrame()
|
| 192 |
|
| 193 |
|
| 194 |
+
def smart_update_hf_dataset(new_data, token, force_upload=False):
|
| 195 |
"""Smart update: Only uploads if new data exists or gaps are filled.
|
| 196 |
|
| 197 |
Handles new ETFs added to ETF_LIST: detects columns present in new_data
|
|
|
|
| 265 |
|
| 266 |
# Force upload if new ETFs were backfilled (filled_gaps may undercount
|
| 267 |
# because existing_df was reindexed to match the new date union)
|
| 268 |
+
needs_update = force_upload or new_rows > 0 or filled_gaps > 0 or len(new_etf_cols) > 0
|
| 269 |
|
| 270 |
if needs_update:
|
| 271 |
combined.index.name = "Date"
|
|
|
|
| 278 |
repo_id=REPO_ID,
|
| 279 |
repo_type="dataset",
|
| 280 |
token=token,
|
| 281 |
+
commit_message=("FORCE " if force_upload else "") + f"Update: {get_est_time().strftime('%Y-%m-%d %H:%M EST')} | +{new_rows} rows, filled {filled_gaps} gaps" + (f", backfilled {new_etf_cols}" if new_etf_cols else ""),
|
| 282 |
)
|
| 283 |
|
| 284 |
st.success(f"✅ Dataset updated: +{new_rows} rows, filled {filled_gaps} gaps")
|
|
|
|
| 397 |
if not etf_data.empty and not macro_data.empty:
|
| 398 |
new_df = pd.concat([etf_data, macro_data], axis=1)
|
| 399 |
token = os.getenv("HF_TOKEN")
|
| 400 |
+
df = smart_update_hf_dataset(new_df, token, force_upload=force_refresh)
|
| 401 |
|
| 402 |
# Fetch fresh if still empty
|
| 403 |
if df.empty:
|