P2-ETF-TREND-SUITE / data /updater.py
github-actions
Sync from GitHub
d763787
import os
import pandas as pd
import yfinance as yf
from huggingface_hub import HfApi
DATASET_PATH = "P2SAMAPA/etf_trend_data"
def update_market_data(df):
tickers = df["ticker"].unique()
all_new = []
for ticker in tickers:
last_date = df[df["ticker"] == ticker]["date"].max()
start_date = (last_date + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
new_data = yf.download(ticker, start=start_date, progress=False)
if new_data.empty:
continue
new_data.reset_index(inplace=True)
new_data["ticker"] = ticker
new_data.rename(columns={
"Date": "date",
"Open": "open",
"High": "high",
"Low": "low",
"Close": "close",
"Adj Close": "adjusted_close",
"Volume": "volume"
}, inplace=True)
all_new.append(new_data)
if not all_new:
return df
new_df = pd.concat(all_new)
df = pd.concat([df, new_df])
df.drop_duplicates(subset=["date", "ticker"], inplace=True)
df.sort_values(["ticker", "date"], inplace=True)
df.to_parquet("updated.parquet")
api = HfApi()
api.upload_file(
path_or_fileobj="updated.parquet",
path_in_repo="data/train-00000-of-00001.parquet",
repo_id=DATASET_PATH,
repo_type="dataset",
token=os.getenv("HF_TOKEN")
)
return df