P2SAMAPA commited on
Commit
2c5ef71
·
unverified ·
1 Parent(s): 7f02f24

Update data_manager.py

Browse files
Files changed (1) hide show
  1. data_manager.py +4 -4
data_manager.py CHANGED
@@ -191,7 +191,7 @@ def fetch_etf_data(etfs, start_date="2008-01-01"):
191
  return pd.DataFrame()
192
 
193
 
194
- def smart_update_hf_dataset(new_data, token):
195
  """Smart update: Only uploads if new data exists or gaps are filled.
196
 
197
  Handles new ETFs added to ETF_LIST: detects columns present in new_data
@@ -265,7 +265,7 @@ def smart_update_hf_dataset(new_data, token):
265
 
266
  # Force upload if new ETFs were backfilled (filled_gaps may undercount
267
  # because existing_df was reindexed to match the new date union)
268
- needs_update = new_rows > 0 or filled_gaps > 0 or len(new_etf_cols) > 0
269
 
270
  if needs_update:
271
  combined.index.name = "Date"
@@ -278,7 +278,7 @@ def smart_update_hf_dataset(new_data, token):
278
  repo_id=REPO_ID,
279
  repo_type="dataset",
280
  token=token,
281
- commit_message=f"Update: {get_est_time().strftime('%Y-%m-%d %H:%M EST')} | +{new_rows} rows, filled {filled_gaps} gaps" + (f", backfilled {new_etf_cols}" if new_etf_cols else "")
282
  )
283
 
284
  st.success(f"✅ Dataset updated: +{new_rows} rows, filled {filled_gaps} gaps")
@@ -397,7 +397,7 @@ def get_data(start_year, force_refresh=False, clean_hf_dataset=False):
397
  if not etf_data.empty and not macro_data.empty:
398
  new_df = pd.concat([etf_data, macro_data], axis=1)
399
  token = os.getenv("HF_TOKEN")
400
- df = smart_update_hf_dataset(new_df, token)
401
 
402
  # Fetch fresh if still empty
403
  if df.empty:
 
191
  return pd.DataFrame()
192
 
193
 
194
+ def smart_update_hf_dataset(new_data, token, force_upload=False):
195
  """Smart update: Only uploads if new data exists or gaps are filled.
196
 
197
  Handles new ETFs added to ETF_LIST: detects columns present in new_data
 
265
 
266
  # Force upload if new ETFs were backfilled (filled_gaps may undercount
267
  # because existing_df was reindexed to match the new date union)
268
+ needs_update = force_upload or new_rows > 0 or filled_gaps > 0 or len(new_etf_cols) > 0
269
 
270
  if needs_update:
271
  combined.index.name = "Date"
 
278
  repo_id=REPO_ID,
279
  repo_type="dataset",
280
  token=token,
281
+ commit_message=("FORCE " if force_upload else "") + f"Update: {get_est_time().strftime('%Y-%m-%d %H:%M EST')} | +{new_rows} rows, filled {filled_gaps} gaps" + (f", backfilled {new_etf_cols}" if new_etf_cols else ""),
282
  )
283
 
284
  st.success(f"✅ Dataset updated: +{new_rows} rows, filled {filled_gaps} gaps")
 
397
  if not etf_data.empty and not macro_data.empty:
398
  new_df = pd.concat([etf_data, macro_data], axis=1)
399
  token = os.getenv("HF_TOKEN")
400
+ df = smart_update_hf_dataset(new_df, token, force_upload=force_refresh)
401
 
402
  # Fetch fresh if still empty
403
  if df.empty: