JayLacoma's picture
Update geo_macro.py
87076e2 verified
# geo_macro.py
import pandas as pd
import numpy as np
import yfinance as yf
import requests
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')
# ======================
# CONFIGURATION
# ======================
# Add your FRED API key here
FRED_API_KEY = '23f3511b0ca43918ccd503ef64cb844e'
# ======================
# UNIFIED DATA DOWNLOADER
# ======================
class UnifiedMarketDataDownloader:
"""Downloads all market data into a single DataFrame"""
def __init__(self, fred_api_key=None):
self.fred_api_key = fred_api_key
# All market data sources
self.data_sources = {
# US Rates & Currencies
'DGS10': '^TNX',
'DGS2': '^FVX',
'DGS3MO': '^IRX',
'DXY': 'DX-Y.NYB',
'EURUSD': 'EURUSD=X',
'JPYUSD': 'JPYUSD=X',
# US Equity Indices
'SP500': '^GSPC',
'NASDAQ': '^IXIC',
'RUSSELL': '^RUT',
'DJI': '^DJI',
'VIX': '^VIX',
'VXN': '^VXN',
# Commodities
'Gold': 'GC=F',
'Oil': 'CL=F',
'Copper': 'HG=F',
'Silver': 'SI=F',
'NaturalGas': 'NG=F',
# Credit & Fixed Income
'HYG': 'HYG',
'JNK': 'JNK',
'LQD': 'LQD',
'TIP': 'TIP',
'TLT': 'TLT',
# Global Markets
'China': 'FXI',
'China_Tech': 'KWEB',
'Europe': 'FEZ',
'Europe_Financials': 'EUFN',
'Japan': 'EWJ',
'South_Korea': 'EWY',
'Taiwan': 'EWT',
'India': 'INDA',
'Brazil': 'EWZ',
'Emerging_Markets': 'EEM',
# Global Currencies
'CNY': 'CNY=X',
'JPY': 'JPY=X',
'EUR': 'EUR=X',
'GBP': 'GBP=X',
# Geopolitical Indicators
'Defense_Stocks': 'ITA',
'Cybersecurity': 'HACK',
'Energy_Security': 'XLE',
'Gold_Safe_Haven': 'GLD',
'US_Treasuries_Long': 'TLT',
# Sectors
'Technology': 'XLK',
'Financials': 'XLF',
'Healthcare': 'XLV',
'Consumer_Discretionary': 'XLY',
'Consumer_Staples': 'XLP',
'Energy': 'XLE',
'Materials': 'XLB',
'Industrials': 'XLI',
'Utilities': 'XLU',
'Real_Estate': 'XLRE',
'Communication_Services': 'XLC',
# Sector Details
'Regional_Banks': 'KRE',
'Homebuilders': 'XHB',
'Retail': 'XRT',
'Transportation': 'XTN',
'Semiconductors': 'SMH',
'Clean_Energy': 'ICLN',
'Aerospace_Defense': 'XAR',
# Supply Chain & Logistics
'Baltic_Dry_Index': 'BDRY', # Shipping costs proxy
'Logistics': 'XTN',
# Credit Spreads & Risk
'Investment_Grade_Spread': 'LQD', # Already have, key for credit risk
'Emerging_Market_Debt': 'EMB',
'Muni_Bonds': 'MUB',
# Inflation Breakevens
'Inflation_Protected': 'TIP', # Already have
'Short_Term_Treasuries': 'SHY',
'Intermediate_Treasuries': 'IEF',
# Currency Volatility
'USD_Emerging': 'UUP', # USD strength
'Gold_Miners': 'GDX', # Gold mining companies (more volatile than GLD)
# Economic Cycle Indicators
'Small_Cap_Value': 'IWN', # Early cycle indicator
'High_Dividend': 'VYM', # Late cycle/defensive
'Growth_Stocks': 'VUG', # Risk-on
'Value_Stocks': 'VTV', # Risk-off rotation
# Liquidity & Credit Conditions
'Mortgage_REITs': 'REM', # Interest rate sensitivity
'Preferred_Stock': 'PFF', # Credit conditions
# Global Safe Havens
'Swiss_Franc': 'CHF=X',
'Gold_Futures': 'GC=F', # Already have as 'Gold'
'Bitcoin': 'BTC-USD', # Alternative safe haven / risk asset
# Commodity Inflation
'Agricultural': 'DBA',
'Base_Metals': 'DBB',
'Crude_Oil': 'USO',
# Labor Market
'Staffing': 'SIA', # Staffing index (leading indicator)
# Housing Market
'Mortgage_Backed_Securities': 'MBB',
'REITs': 'VNQ',
# Consumer Health
'Consumer_Discretionary_vs_Staples': 'XLY', # Already have
'Restaurants': 'EAT', # Consumer spending proxy
'Retail_Luxury': 'RL', # High-end consumer
# Tech Innovation Cycles
'Cloud_Computing': 'SKYY',
'Robotics_AI': 'BOTZ',
'Fintech': 'FINX',
# Geopolitical Tension Proxies
'Uranium': 'URA', # Nuclear/energy security
'Rare_Earth': 'REMX', # Supply chain geopolitics
'Water': 'PHO', # Resource scarcity
# Sentiment & Positioning
'Leveraged_Loans': 'BKLN', # Credit appetite
'TIPS_Spread': 'TIP', # Inflation expectations
}
# FRED Economic Series
self.fred_series = {
# Labor Market
'UNRATE': 'UNRATE', # Unemployment Rate
'PAYEMS': 'PAYEMS', # Non-Farm Payrolls
'ICSA': 'ICSA', # Initial Jobless Claims
'JTSJOL': 'JTSJOL', # Job Openings (JOLTS)
# Inflation
'CPIAUCSL': 'CPIAUCSL', # CPI
'CPILFESL': 'CPILFESL', # Core CPI
'PPIACO': 'PPIACO', # PPI
'PCEPILFE': 'PCEPILFE', # Core PCE (Fed's preferred)
# Production & Manufacturing
'INDPRO': 'INDPRO', # Industrial Production
'IPMAN': 'IPMAN', # Manufacturing Production
'TOTALSA': 'TOTALSA', # Total Vehicle Sales
'UMTMVS': 'UMTMVS', # Manufacturing New Orders
# Money & Credit
'M2': 'M2SL', # M2 Money Supply
'WALCL': 'WALCL', # Fed Balance Sheet
'TOTCI': 'TOTCI', # Commercial & Industrial Loans
# Consumer
'CONSUMER_SENTIMENT': 'UMCSENT', # Consumer Sentiment
'RSXFS': 'RSXFS', # Retail Sales
'PCE': 'PCE', # Personal Consumption
'PSAVERT': 'PSAVERT', # Personal Saving Rate
# Housing
'HOUST': 'HOUST', # Housing Starts
'MORTGAGE30US': 'MORTGAGE30US', # 30-Year Mortgage Rate
'CSUSHPISA': 'CSUSHPISA', # Case-Shiller Home Price Index
# Trade & GDP
'GDP': 'GDP', # GDP
'NETEXP': 'NETEXP', # Net Exports
'BOPGSTB': 'BOPGSTB', # Trade Balance
# Yield Curve & Credit
'T10Y2Y': 'T10Y2Y', # 10Y-2Y Yield Spread (recession indicator)
'T10YIE': 'T10YIE', # 10Y Breakeven Inflation Rate
'BAMLH0A0HYM2': 'BAMLH0A0HYM2', # High Yield Spread
'DFII10': 'DFII10', # 10-Year TIPS
# Leading Indicators
'USSLIND': 'USSLIND', # Leading Index
'DCOILWTICO': 'DCOILWTICO', # WTI Crude Oil Price
# Corporate & Business
'CPROFIT': 'CPROFIT', # Corporate Profits
'BUSLOANS': 'BUSLOANS', # Business Loans
}
def download_all_data(self, start_date='2018-01-01', end_date=None):
"""
Download all market data and return a single unified DataFrame
Parameters:
-----------
start_date : str
Start date in 'YYYY-MM-DD' format
end_date : str, optional
End date in 'YYYY-MM-DD' format (defaults to today)
Returns:
--------
pd.DataFrame
Unified DataFrame with all market data, date-indexed
"""
if end_date is None:
end_date = datetime.now().strftime('%Y-%m-%d')
print("=" * 80)
print("🚀 UNIFIED MARKET DATA DOWNLOAD")
print("=" * 80)
print(f"📅 Period: {start_date} to {end_date}\n")
# Step 1: Download all market data
print("📊 Downloading Market Data...")
market_data = self._download_market_data(start_date, end_date)
# Step 2: Download FRED economic data
if self.fred_api_key and self.fred_api_key != 'your_api_key_here':
print("\n📈 Downloading Economic Data (FRED)...")
economic_data = self._download_fred_data(start_date, end_date)
else:
print("\n⚠️ FRED API key not provided - skipping economic data")
print(" Get your free API key at: https://fred.stlouisfed.org/docs/api/api_key.html")
economic_data = {}
# Step 3: Merge everything into single DataFrame
print("\n🔗 Merging all data sources...")
unified_df = self._merge_all_data(market_data, economic_data, start_date, end_date)
# Print summary
print("\n" + "=" * 80)
print("✅ DOWNLOAD COMPLETE")
print("=" * 80)
print(f"📊 Total Columns: {unified_df.shape[1]}")
print(f"📅 Total Rows: {unified_df.shape[0]}")
print(f"🗓️ Date Range: {unified_df.index.min().strftime('%Y-%m-%d')} to {unified_df.index.max().strftime('%Y-%m-%d')}")
print(f"📉 Missing Values: {unified_df.isnull().sum().sum()}")
print(f"✨ Completeness: {(1 - unified_df.isnull().sum().sum()/(unified_df.shape[0]*unified_df.shape[1]))*100:.2f}%")
print("\n📋 Column Preview:")
print(unified_df.columns.tolist()[:10], "...\n")
return unified_df
def _download_market_data(self, start_date, end_date):
"""Download all market data from Yahoo Finance"""
all_tickers = list(self.data_sources.values())
print(f" Downloading {len(all_tickers)} tickers...")
# Download all at once (faster)
try:
data = yf.download(all_tickers, start=start_date, end=end_date,
progress=False, auto_adjust=True, threads=True)
# Extract Close prices
if isinstance(data.columns, pd.MultiIndex):
close_data = data['Close']
else:
close_data = data[['Close']] if 'Close' in data.columns else data
# Rename columns to our names
ticker_to_name = {v: k for k, v in self.data_sources.items()}
close_data.columns = [ticker_to_name.get(col, col) for col in close_data.columns]
# Remove any columns that failed to download
close_data = close_data(axis=1, how='all')
print(f" ✅ Successfully downloaded {len(close_data.columns)} series")
return close_data
except Exception as e:
print(f" ⚠️ Batch download failed: {e}")
print(" Trying individual downloads...")
return self._download_individual(start_date, end_date)
def _download_individual(self, start_date, end_date):
"""Fallback: download tickers individually"""
data_dict = {}
for name, ticker in self.data_sources.items():
try:
df = yf.download(ticker, start=start_date, end=end_date,
progress=False, auto_adjust=True)
if not df.empty and 'Close' in df.columns:
series = df['Close'].squeeze()
if isinstance(series, pd.DataFrame):
series = series.iloc[:, 0]
if isinstance(series, pd.Series) and len(series) > 10:
data_dict[name] = series
print(f" ✅ {name}")
except Exception as e:
print(f" ❌ {name}: {str(e)[:50]}")
continue
return pd.DataFrame(data_dict)
def _download_fred_data(self, start_date, end_date):
"""Download economic data from FRED"""
economic_data = {}
for name, series_id in self.fred_series.items():
try:
url = "https://api.stlouisfed.org/fred/series/observations"
params = {
'series_id': series_id,
'api_key': self.fred_api_key,
'file_type': 'json',
'observation_start': start_date,
'observation_end': end_date,
}
response = requests.get(url, params=params, timeout=30)
if response.status_code == 200:
data = response.json()
if 'observations' in data and data['observations']:
df = pd.DataFrame(data['observations'])
df['value'] = pd.to_numeric(df['value'], errors='coerce')
df['date'] = pd.to_datetime(df['date'])
series = df.set_index('date')['value']
if len(series) > 10:
economic_data[name] = series
print(f" ✅ {name}")
else:
print(f" ❌ {name}: HTTP {response.status_code}")
except Exception as e:
print(f" ❌ {name}: {str(e)[:50]}")
continue
return economic_data
def _merge_all_data(self, market_data, economic_data, start_date, end_date):
"""Merge all data sources into single DataFrame with ffill + bfill"""
# Create a full daily date range (including weekends/holidays)
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
# Initialize unified DataFrame with full date index
unified = pd.DataFrame(index=date_range)
# Add market data (already date-indexed from yfinance)
for col in market_data.columns:
unified[col] = market_data[col].reindex(date_range)
# Add FRED economic data
for name, series in economic_data.items():
unified[name] = series.reindex(date_range)
# Forward-fill, then backward-fill to handle leading/trailing NaNs
unified = unified.ffill().bfill()
# Remove any columns that are still entirely NaN (e.g., failed downloads)
unified = unified.dropna(axis=1, how='all')
return unified
'''
# ======================
# USAGE EXAMPLE
# ======================
if __name__ == "__main__":
# Initialize downloader
downloader = UnifiedMarketDataDownloader(fred_api_key=FRED_API_KEY)
# Download all data
raw_market_data = downloader.download_all_data(
start_date='2018-01-01',
end_date=None # defaults to today
)
# Save to CSV (optional)
# raw_market_data.to_csv('unified_market_data.csv')
# print("\n💾 Saved to: unified_market_data.csv")
'''