# geo_macro.py import pandas as pd import numpy as np import yfinance as yf import requests from datetime import datetime import warnings warnings.filterwarnings('ignore') # ====================== # CONFIGURATION # ====================== # Add your FRED API key here FRED_API_KEY = '23f3511b0ca43918ccd503ef64cb844e' # ====================== # UNIFIED DATA DOWNLOADER # ====================== class UnifiedMarketDataDownloader: """Downloads all market data into a single DataFrame""" def __init__(self, fred_api_key=None): self.fred_api_key = fred_api_key # All market data sources self.data_sources = { # US Rates & Currencies 'DGS10': '^TNX', 'DGS2': '^FVX', 'DGS3MO': '^IRX', 'DXY': 'DX-Y.NYB', 'EURUSD': 'EURUSD=X', 'JPYUSD': 'JPYUSD=X', # US Equity Indices 'SP500': '^GSPC', 'NASDAQ': '^IXIC', 'RUSSELL': '^RUT', 'DJI': '^DJI', 'VIX': '^VIX', 'VXN': '^VXN', # Commodities 'Gold': 'GC=F', 'Oil': 'CL=F', 'Copper': 'HG=F', 'Silver': 'SI=F', 'NaturalGas': 'NG=F', # Credit & Fixed Income 'HYG': 'HYG', 'JNK': 'JNK', 'LQD': 'LQD', 'TIP': 'TIP', 'TLT': 'TLT', # Global Markets 'China': 'FXI', 'China_Tech': 'KWEB', 'Europe': 'FEZ', 'Europe_Financials': 'EUFN', 'Japan': 'EWJ', 'South_Korea': 'EWY', 'Taiwan': 'EWT', 'India': 'INDA', 'Brazil': 'EWZ', 'Emerging_Markets': 'EEM', # Global Currencies 'CNY': 'CNY=X', 'JPY': 'JPY=X', 'EUR': 'EUR=X', 'GBP': 'GBP=X', # Geopolitical Indicators 'Defense_Stocks': 'ITA', 'Cybersecurity': 'HACK', 'Energy_Security': 'XLE', 'Gold_Safe_Haven': 'GLD', 'US_Treasuries_Long': 'TLT', # Sectors 'Technology': 'XLK', 'Financials': 'XLF', 'Healthcare': 'XLV', 'Consumer_Discretionary': 'XLY', 'Consumer_Staples': 'XLP', 'Energy': 'XLE', 'Materials': 'XLB', 'Industrials': 'XLI', 'Utilities': 'XLU', 'Real_Estate': 'XLRE', 'Communication_Services': 'XLC', # Sector Details 'Regional_Banks': 'KRE', 'Homebuilders': 'XHB', 'Retail': 'XRT', 'Transportation': 'XTN', 'Semiconductors': 'SMH', 'Clean_Energy': 'ICLN', 'Aerospace_Defense': 'XAR', # Supply Chain & Logistics 'Baltic_Dry_Index': 'BDRY', # Shipping costs proxy 'Logistics': 'XTN', # Credit Spreads & Risk 'Investment_Grade_Spread': 'LQD', # Already have, key for credit risk 'Emerging_Market_Debt': 'EMB', 'Muni_Bonds': 'MUB', # Inflation Breakevens 'Inflation_Protected': 'TIP', # Already have 'Short_Term_Treasuries': 'SHY', 'Intermediate_Treasuries': 'IEF', # Currency Volatility 'USD_Emerging': 'UUP', # USD strength 'Gold_Miners': 'GDX', # Gold mining companies (more volatile than GLD) # Economic Cycle Indicators 'Small_Cap_Value': 'IWN', # Early cycle indicator 'High_Dividend': 'VYM', # Late cycle/defensive 'Growth_Stocks': 'VUG', # Risk-on 'Value_Stocks': 'VTV', # Risk-off rotation # Liquidity & Credit Conditions 'Mortgage_REITs': 'REM', # Interest rate sensitivity 'Preferred_Stock': 'PFF', # Credit conditions # Global Safe Havens 'Swiss_Franc': 'CHF=X', 'Gold_Futures': 'GC=F', # Already have as 'Gold' 'Bitcoin': 'BTC-USD', # Alternative safe haven / risk asset # Commodity Inflation 'Agricultural': 'DBA', 'Base_Metals': 'DBB', 'Crude_Oil': 'USO', # Labor Market 'Staffing': 'SIA', # Staffing index (leading indicator) # Housing Market 'Mortgage_Backed_Securities': 'MBB', 'REITs': 'VNQ', # Consumer Health 'Consumer_Discretionary_vs_Staples': 'XLY', # Already have 'Restaurants': 'EAT', # Consumer spending proxy 'Retail_Luxury': 'RL', # High-end consumer # Tech Innovation Cycles 'Cloud_Computing': 'SKYY', 'Robotics_AI': 'BOTZ', 'Fintech': 'FINX', # Geopolitical Tension Proxies 'Uranium': 'URA', # Nuclear/energy security 'Rare_Earth': 'REMX', # Supply chain geopolitics 'Water': 'PHO', # Resource scarcity # Sentiment & Positioning 'Leveraged_Loans': 'BKLN', # Credit appetite 'TIPS_Spread': 'TIP', # Inflation expectations } # FRED Economic Series self.fred_series = { # Labor Market 'UNRATE': 'UNRATE', # Unemployment Rate 'PAYEMS': 'PAYEMS', # Non-Farm Payrolls 'ICSA': 'ICSA', # Initial Jobless Claims 'JTSJOL': 'JTSJOL', # Job Openings (JOLTS) # Inflation 'CPIAUCSL': 'CPIAUCSL', # CPI 'CPILFESL': 'CPILFESL', # Core CPI 'PPIACO': 'PPIACO', # PPI 'PCEPILFE': 'PCEPILFE', # Core PCE (Fed's preferred) # Production & Manufacturing 'INDPRO': 'INDPRO', # Industrial Production 'IPMAN': 'IPMAN', # Manufacturing Production 'TOTALSA': 'TOTALSA', # Total Vehicle Sales 'UMTMVS': 'UMTMVS', # Manufacturing New Orders # Money & Credit 'M2': 'M2SL', # M2 Money Supply 'WALCL': 'WALCL', # Fed Balance Sheet 'TOTCI': 'TOTCI', # Commercial & Industrial Loans # Consumer 'CONSUMER_SENTIMENT': 'UMCSENT', # Consumer Sentiment 'RSXFS': 'RSXFS', # Retail Sales 'PCE': 'PCE', # Personal Consumption 'PSAVERT': 'PSAVERT', # Personal Saving Rate # Housing 'HOUST': 'HOUST', # Housing Starts 'MORTGAGE30US': 'MORTGAGE30US', # 30-Year Mortgage Rate 'CSUSHPISA': 'CSUSHPISA', # Case-Shiller Home Price Index # Trade & GDP 'GDP': 'GDP', # GDP 'NETEXP': 'NETEXP', # Net Exports 'BOPGSTB': 'BOPGSTB', # Trade Balance # Yield Curve & Credit 'T10Y2Y': 'T10Y2Y', # 10Y-2Y Yield Spread (recession indicator) 'T10YIE': 'T10YIE', # 10Y Breakeven Inflation Rate 'BAMLH0A0HYM2': 'BAMLH0A0HYM2', # High Yield Spread 'DFII10': 'DFII10', # 10-Year TIPS # Leading Indicators 'USSLIND': 'USSLIND', # Leading Index 'DCOILWTICO': 'DCOILWTICO', # WTI Crude Oil Price # Corporate & Business 'CPROFIT': 'CPROFIT', # Corporate Profits 'BUSLOANS': 'BUSLOANS', # Business Loans } def download_all_data(self, start_date='2018-01-01', end_date=None): """ Download all market data and return a single unified DataFrame Parameters: ----------- start_date : str Start date in 'YYYY-MM-DD' format end_date : str, optional End date in 'YYYY-MM-DD' format (defaults to today) Returns: -------- pd.DataFrame Unified DataFrame with all market data, date-indexed """ if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d') print("=" * 80) print("šŸš€ UNIFIED MARKET DATA DOWNLOAD") print("=" * 80) print(f"šŸ“… Period: {start_date} to {end_date}\n") # Step 1: Download all market data print("šŸ“Š Downloading Market Data...") market_data = self._download_market_data(start_date, end_date) # Step 2: Download FRED economic data if self.fred_api_key and self.fred_api_key != 'your_api_key_here': print("\nšŸ“ˆ Downloading Economic Data (FRED)...") economic_data = self._download_fred_data(start_date, end_date) else: print("\nāš ļø FRED API key not provided - skipping economic data") print(" Get your free API key at: https://fred.stlouisfed.org/docs/api/api_key.html") economic_data = {} # Step 3: Merge everything into single DataFrame print("\nšŸ”— Merging all data sources...") unified_df = self._merge_all_data(market_data, economic_data, start_date, end_date) # Print summary print("\n" + "=" * 80) print("āœ… DOWNLOAD COMPLETE") print("=" * 80) print(f"šŸ“Š Total Columns: {unified_df.shape[1]}") print(f"šŸ“… Total Rows: {unified_df.shape[0]}") print(f"šŸ—“ļø Date Range: {unified_df.index.min().strftime('%Y-%m-%d')} to {unified_df.index.max().strftime('%Y-%m-%d')}") print(f"šŸ“‰ Missing Values: {unified_df.isnull().sum().sum()}") print(f"✨ Completeness: {(1 - unified_df.isnull().sum().sum()/(unified_df.shape[0]*unified_df.shape[1]))*100:.2f}%") print("\nšŸ“‹ Column Preview:") print(unified_df.columns.tolist()[:10], "...\n") return unified_df def _download_market_data(self, start_date, end_date): """Download all market data from Yahoo Finance""" all_tickers = list(self.data_sources.values()) print(f" Downloading {len(all_tickers)} tickers...") # Download all at once (faster) try: data = yf.download(all_tickers, start=start_date, end=end_date, progress=False, auto_adjust=True, threads=True) # Extract Close prices if isinstance(data.columns, pd.MultiIndex): close_data = data['Close'] else: close_data = data[['Close']] if 'Close' in data.columns else data # Rename columns to our names ticker_to_name = {v: k for k, v in self.data_sources.items()} close_data.columns = [ticker_to_name.get(col, col) for col in close_data.columns] # Remove any columns that failed to download close_data = close_data(axis=1, how='all') print(f" āœ… Successfully downloaded {len(close_data.columns)} series") return close_data except Exception as e: print(f" āš ļø Batch download failed: {e}") print(" Trying individual downloads...") return self._download_individual(start_date, end_date) def _download_individual(self, start_date, end_date): """Fallback: download tickers individually""" data_dict = {} for name, ticker in self.data_sources.items(): try: df = yf.download(ticker, start=start_date, end=end_date, progress=False, auto_adjust=True) if not df.empty and 'Close' in df.columns: series = df['Close'].squeeze() if isinstance(series, pd.DataFrame): series = series.iloc[:, 0] if isinstance(series, pd.Series) and len(series) > 10: data_dict[name] = series print(f" āœ… {name}") except Exception as e: print(f" āŒ {name}: {str(e)[:50]}") continue return pd.DataFrame(data_dict) def _download_fred_data(self, start_date, end_date): """Download economic data from FRED""" economic_data = {} for name, series_id in self.fred_series.items(): try: url = "https://api.stlouisfed.org/fred/series/observations" params = { 'series_id': series_id, 'api_key': self.fred_api_key, 'file_type': 'json', 'observation_start': start_date, 'observation_end': end_date, } response = requests.get(url, params=params, timeout=30) if response.status_code == 200: data = response.json() if 'observations' in data and data['observations']: df = pd.DataFrame(data['observations']) df['value'] = pd.to_numeric(df['value'], errors='coerce') df['date'] = pd.to_datetime(df['date']) series = df.set_index('date')['value'] if len(series) > 10: economic_data[name] = series print(f" āœ… {name}") else: print(f" āŒ {name}: HTTP {response.status_code}") except Exception as e: print(f" āŒ {name}: {str(e)[:50]}") continue return economic_data def _merge_all_data(self, market_data, economic_data, start_date, end_date): """Merge all data sources into single DataFrame with ffill + bfill""" # Create a full daily date range (including weekends/holidays) date_range = pd.date_range(start=start_date, end=end_date, freq='D') # Initialize unified DataFrame with full date index unified = pd.DataFrame(index=date_range) # Add market data (already date-indexed from yfinance) for col in market_data.columns: unified[col] = market_data[col].reindex(date_range) # Add FRED economic data for name, series in economic_data.items(): unified[name] = series.reindex(date_range) # Forward-fill, then backward-fill to handle leading/trailing NaNs unified = unified.ffill().bfill() # Remove any columns that are still entirely NaN (e.g., failed downloads) unified = unified.dropna(axis=1, how='all') return unified ''' # ====================== # USAGE EXAMPLE # ====================== if __name__ == "__main__": # Initialize downloader downloader = UnifiedMarketDataDownloader(fred_api_key=FRED_API_KEY) # Download all data raw_market_data = downloader.download_all_data( start_date='2018-01-01', end_date=None # defaults to today ) # Save to CSV (optional) # raw_market_data.to_csv('unified_market_data.csv') # print("\nšŸ’¾ Saved to: unified_market_data.csv") '''