File size: 2,742 Bytes
3cfab16
4b5906f
 
 
 
 
01aa02e
4b5906f
 
0f6cd86
4b5906f
 
01aa02e
4b5906f
 
 
01aa02e
4b5906f
 
 
 
 
 
 
 
01aa02e
4b5906f
 
 
 
 
 
 
 
 
 
 
 
 
01aa02e
4b5906f
 
 
 
3cfab16
4b5906f
 
ff33c90
4b5906f
 
 
 
 
 
5a922dd
4b5906f
 
 
5a922dd
4b5906f
 
 
 
 
01aa02e
4b5906f
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import pandas as pd
import pandas_datareader.data as web
import yfinance as yf
from huggingface_hub import hf_hub_download, HfApi
import os
import streamlit as st

# --- GLOBAL CONSTANTS ---
X_EQUITY_TICKERS = ["XLK", "XLY", "XLP", "XLE", "XLV", "XLI", "XLB", "XLRE", "XLU", "XLC", "XLF", "XBI", "XME", "XOP", "XHB", "XSD", "XRT", "XPH", "XES", "XAR", "XHS", "XHE", "XSW", "XTN", "XTL", "XNTK", "XITK"]
FI_TICKERS = ["TLT", "IEF", "TIP", "GLD", "SLV", "VGIT", "VCLT", "VCIT", "HYG", "PFF", "MBB", "VNQ", "LQD", "AGG"]
REPO_ID = "P2SAMAPA/etf_trend_data"
FILENAME = "market_data.csv"

def get_safe_token():
    try: return st.secrets["HF_TOKEN"]
    except: return os.getenv("HF_TOKEN")

def load_from_hf():
    token = get_safe_token()
    if not token: return None
    try:
        path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, repo_type="dataset", token=token)
        df = pd.read_csv(path, index_col=0, parse_dates=True)
        return df.ffill()
    except: return None

def seed_dataset_from_scratch():
    tickers = list(set(X_EQUITY_TICKERS + FI_TICKERS + ["SPY", "AGG"]))
    data = yf.download(tickers, start="2008-01-01", progress=False)
    master_df = data['Adj Close'] if 'Adj Close' in data.columns else data['Close']
    try:
        sofr = web.DataReader('SOFR', 'fred', start="2008-01-01").ffill()
        master_df['SOFR_ANNUAL'] = sofr / 100
    except:
        master_df['SOFR_ANNUAL'] = 0.045
    master_df = master_df.sort_index().ffill()
    master_df.to_csv(FILENAME)
    upload_to_hf(FILENAME)
    return master_df

def sync_incremental_data(df):
    if df is None: return None, "error"
    last_date = pd.to_datetime(df.index.max()).date()
    today = pd.Timestamp.now().date()
    
    if last_date >= today:
        return df, "already_current"

    sync_start = last_date + pd.Timedelta(days=1)
    tickers = list(set(X_EQUITY_TICKERS + FI_TICKERS + ["SPY", "AGG"]))
    try:
        new_data_raw = yf.download(tickers, start=sync_start, progress=False)
        if new_data_raw is None or new_data_raw.empty:
            return df, "no_new_data_yet"

        new_data = new_data_raw['Adj Close'] if 'Adj Close' in new_data_raw.columns else new_data_raw['Close']
        combined = pd.concat([df, new_data]).sort_index()
        combined = combined[~combined.index.duplicated(keep='last')].ffill()
        
        combined.to_csv(FILENAME)
        upload_to_hf(FILENAME)
        return combined, "success"
    except:
        return df, "api_failure"

def upload_to_hf(path):
    token = get_safe_token()
    if token:
        api = HfApi()
        try:
            api.upload_file(path_or_fileobj=path, path_in_repo=FILENAME, repo_id=REPO_ID, repo_type="dataset", token=token)
        except: pass