Spaces:
Running
Running
File size: 2,742 Bytes
3cfab16 4b5906f 01aa02e 4b5906f 0f6cd86 4b5906f 01aa02e 4b5906f 01aa02e 4b5906f 01aa02e 4b5906f 01aa02e 4b5906f 3cfab16 4b5906f ff33c90 4b5906f 5a922dd 4b5906f 5a922dd 4b5906f 01aa02e 4b5906f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | import pandas as pd
import pandas_datareader.data as web
import yfinance as yf
from huggingface_hub import hf_hub_download, HfApi
import os
import streamlit as st
# --- GLOBAL CONSTANTS ---
X_EQUITY_TICKERS = ["XLK", "XLY", "XLP", "XLE", "XLV", "XLI", "XLB", "XLRE", "XLU", "XLC", "XLF", "XBI", "XME", "XOP", "XHB", "XSD", "XRT", "XPH", "XES", "XAR", "XHS", "XHE", "XSW", "XTN", "XTL", "XNTK", "XITK"]
FI_TICKERS = ["TLT", "IEF", "TIP", "GLD", "SLV", "VGIT", "VCLT", "VCIT", "HYG", "PFF", "MBB", "VNQ", "LQD", "AGG"]
REPO_ID = "P2SAMAPA/etf_trend_data"
FILENAME = "market_data.csv"
def get_safe_token():
try: return st.secrets["HF_TOKEN"]
except: return os.getenv("HF_TOKEN")
def load_from_hf():
token = get_safe_token()
if not token: return None
try:
path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, repo_type="dataset", token=token)
df = pd.read_csv(path, index_col=0, parse_dates=True)
return df.ffill()
except: return None
def seed_dataset_from_scratch():
tickers = list(set(X_EQUITY_TICKERS + FI_TICKERS + ["SPY", "AGG"]))
data = yf.download(tickers, start="2008-01-01", progress=False)
master_df = data['Adj Close'] if 'Adj Close' in data.columns else data['Close']
try:
sofr = web.DataReader('SOFR', 'fred', start="2008-01-01").ffill()
master_df['SOFR_ANNUAL'] = sofr / 100
except:
master_df['SOFR_ANNUAL'] = 0.045
master_df = master_df.sort_index().ffill()
master_df.to_csv(FILENAME)
upload_to_hf(FILENAME)
return master_df
def sync_incremental_data(df):
if df is None: return None, "error"
last_date = pd.to_datetime(df.index.max()).date()
today = pd.Timestamp.now().date()
if last_date >= today:
return df, "already_current"
sync_start = last_date + pd.Timedelta(days=1)
tickers = list(set(X_EQUITY_TICKERS + FI_TICKERS + ["SPY", "AGG"]))
try:
new_data_raw = yf.download(tickers, start=sync_start, progress=False)
if new_data_raw is None or new_data_raw.empty:
return df, "no_new_data_yet"
new_data = new_data_raw['Adj Close'] if 'Adj Close' in new_data_raw.columns else new_data_raw['Close']
combined = pd.concat([df, new_data]).sort_index()
combined = combined[~combined.index.duplicated(keep='last')].ffill()
combined.to_csv(FILENAME)
upload_to_hf(FILENAME)
return combined, "success"
except:
return df, "api_failure"
def upload_to_hf(path):
token = get_safe_token()
if token:
api = HfApi()
try:
api.upload_file(path_or_fileobj=path, path_in_repo=FILENAME, repo_id=REPO_ID, repo_type="dataset", token=token)
except: pass
|