|
|
|
|
|
|
|
|
import yfinance as yf_lib |
|
|
import pandas as pd |
|
|
import argparse |
|
|
import os |
|
|
from datetime import datetime, timedelta |
|
|
from pandas_datareader import data as pdr |
|
|
|
|
|
|
|
|
|
|
|
ASSETS = ['AAPL', 'MSFT', 'SPY', 'TLT', 'BTC-USD'] |
|
|
|
|
|
|
|
|
FRED_IDS = { |
|
|
'DFF': 'Federal Funds Rate', |
|
|
'CPIAUCSL': 'CPI', |
|
|
'VIXCLS': 'VIX' |
|
|
} |
|
|
|
|
|
|
|
|
def fetch_market_data(start_date, end_date, filename): |
|
|
""" |
|
|
Fetches market data, macroeconomic indicators (including VIX from FRED), |
|
|
for specified assets and time period, then saves it to a CSV file. |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
print(f"--- Fetching market data for {ASSETS} from {start_date} to {end_date} ---") |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
df_prices = yf_lib.download(ASSETS, start=start_date, end=end_date)['Close'] |
|
|
df_prices.dropna(inplace=True) |
|
|
print(f"β
Fetched {len(ASSETS)} asset prices.") |
|
|
except Exception as e: |
|
|
print(f"β Error fetching asset prices: {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
print("--- Fetching macroeconomic data from FRED ---") |
|
|
|
|
|
try: |
|
|
|
|
|
fred_start_date = (datetime.strptime(start_date, '%Y-%m-%d') - timedelta(days=365)).strftime('%Y-%m-%d') |
|
|
|
|
|
|
|
|
df_fred = pdr.DataReader(list(FRED_IDS.keys()), 'fred', start=fred_start_date, end=end_date) |
|
|
df_fred.rename(columns=FRED_IDS, inplace=True) |
|
|
print("β
Fetched Federal Funds Rate, CPI, and VIX data from FRED.") |
|
|
except Exception as e: |
|
|
print(f"β Error fetching FRED data: {e}. Check FRED API access or ticker validity.") |
|
|
df_fred = pd.DataFrame() |
|
|
|
|
|
|
|
|
df_combined = df_prices.copy() |
|
|
|
|
|
|
|
|
if not df_fred.empty: |
|
|
df_combined = df_combined.merge(df_fred, left_index=True, right_index=True, how='left') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for col_name in FRED_IDS.values(): |
|
|
if col_name in df_combined.columns: |
|
|
df_combined[col_name] = df_combined[col_name].ffill().bfill() |
|
|
|
|
|
df_combined.dropna(subset=[col_name], inplace=True) |
|
|
|
|
|
|
|
|
df_combined = df_combined.loc[start_date:end_date] |
|
|
df_combined.dropna(inplace=True) |
|
|
|
|
|
if df_combined.empty: |
|
|
print("β Final combined dataframe is empty after merging and cleaning. Check date ranges and data availability.") |
|
|
return None |
|
|
|
|
|
|
|
|
if filename: |
|
|
output_dir = os.path.dirname(filename) |
|
|
if output_dir and not os.path.dirname(filename) == "": |
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
df_combined.to_csv(filename, index=True) |
|
|
print(f"\nβ
Data saved successfully to {filename}") |
|
|
|
|
|
print(f"Final data shape: {df_combined.shape}") |
|
|
print("Columns:", df_combined.columns.tolist()) |
|
|
|
|
|
return df_combined |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
parser = argparse.ArgumentParser(description="Fetch market and macroeconomic data.") |
|
|
parser.add_argument("--start", type=str, default="2015-01-01", help="Start date (YYYY-MM-DD).") |
|
|
parser.add_argument("--end", type=str, default="2020-12-31", help="End date (YYYY-MM-DD).") |
|
|
parser.add_argument("--filename", type=str, default="data/train.csv", help="Output CSV filename.") |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
fetch_market_data(args.start, args.end, args.filename) |