Spaces:
Sleeping
Sleeping
| # geo_macro.py | |
| import pandas as pd | |
| import numpy as np | |
| import yfinance as yf | |
| import requests | |
| from datetime import datetime | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| # ====================== | |
| # CONFIGURATION | |
| # ====================== | |
| # Add your FRED API key here | |
| FRED_API_KEY = '23f3511b0ca43918ccd503ef64cb844e' | |
| # ====================== | |
| # UNIFIED DATA DOWNLOADER | |
| # ====================== | |
| class UnifiedMarketDataDownloader: | |
| """Downloads all market data into a single DataFrame""" | |
| def __init__(self, fred_api_key=None): | |
| self.fred_api_key = fred_api_key | |
| # All market data sources | |
| self.data_sources = { | |
| # US Rates & Currencies | |
| 'DGS10': '^TNX', | |
| 'DGS2': '^FVX', | |
| 'DGS3MO': '^IRX', | |
| 'DXY': 'DX-Y.NYB', | |
| 'EURUSD': 'EURUSD=X', | |
| 'JPYUSD': 'JPYUSD=X', | |
| # US Equity Indices | |
| 'SP500': '^GSPC', | |
| 'NASDAQ': '^IXIC', | |
| 'RUSSELL': '^RUT', | |
| 'DJI': '^DJI', | |
| 'VIX': '^VIX', | |
| 'VXN': '^VXN', | |
| # Commodities | |
| 'Gold': 'GC=F', | |
| 'Oil': 'CL=F', | |
| 'Copper': 'HG=F', | |
| 'Silver': 'SI=F', | |
| 'NaturalGas': 'NG=F', | |
| # Credit & Fixed Income | |
| 'HYG': 'HYG', | |
| 'JNK': 'JNK', | |
| 'LQD': 'LQD', | |
| 'TIP': 'TIP', | |
| 'TLT': 'TLT', | |
| # Global Markets | |
| 'China': 'FXI', | |
| 'China_Tech': 'KWEB', | |
| 'Europe': 'FEZ', | |
| 'Europe_Financials': 'EUFN', | |
| 'Japan': 'EWJ', | |
| 'South_Korea': 'EWY', | |
| 'Taiwan': 'EWT', | |
| 'India': 'INDA', | |
| 'Brazil': 'EWZ', | |
| 'Emerging_Markets': 'EEM', | |
| # Global Currencies | |
| 'CNY': 'CNY=X', | |
| 'JPY': 'JPY=X', | |
| 'EUR': 'EUR=X', | |
| 'GBP': 'GBP=X', | |
| # Geopolitical Indicators | |
| 'Defense_Stocks': 'ITA', | |
| 'Cybersecurity': 'HACK', | |
| 'Energy_Security': 'XLE', | |
| 'Gold_Safe_Haven': 'GLD', | |
| 'US_Treasuries_Long': 'TLT', | |
| # Sectors | |
| 'Technology': 'XLK', | |
| 'Financials': 'XLF', | |
| 'Healthcare': 'XLV', | |
| 'Consumer_Discretionary': 'XLY', | |
| 'Consumer_Staples': 'XLP', | |
| 'Energy': 'XLE', | |
| 'Materials': 'XLB', | |
| 'Industrials': 'XLI', | |
| 'Utilities': 'XLU', | |
| 'Real_Estate': 'XLRE', | |
| 'Communication_Services': 'XLC', | |
| # Sector Details | |
| 'Regional_Banks': 'KRE', | |
| 'Homebuilders': 'XHB', | |
| 'Retail': 'XRT', | |
| 'Transportation': 'XTN', | |
| 'Semiconductors': 'SMH', | |
| 'Clean_Energy': 'ICLN', | |
| 'Aerospace_Defense': 'XAR', | |
| # Supply Chain & Logistics | |
| 'Baltic_Dry_Index': 'BDRY', # Shipping costs proxy | |
| 'Logistics': 'XTN', | |
| # Credit Spreads & Risk | |
| 'Investment_Grade_Spread': 'LQD', # Already have, key for credit risk | |
| 'Emerging_Market_Debt': 'EMB', | |
| 'Muni_Bonds': 'MUB', | |
| # Inflation Breakevens | |
| 'Inflation_Protected': 'TIP', # Already have | |
| 'Short_Term_Treasuries': 'SHY', | |
| 'Intermediate_Treasuries': 'IEF', | |
| # Currency Volatility | |
| 'USD_Emerging': 'UUP', # USD strength | |
| 'Gold_Miners': 'GDX', # Gold mining companies (more volatile than GLD) | |
| # Economic Cycle Indicators | |
| 'Small_Cap_Value': 'IWN', # Early cycle indicator | |
| 'High_Dividend': 'VYM', # Late cycle/defensive | |
| 'Growth_Stocks': 'VUG', # Risk-on | |
| 'Value_Stocks': 'VTV', # Risk-off rotation | |
| # Liquidity & Credit Conditions | |
| 'Mortgage_REITs': 'REM', # Interest rate sensitivity | |
| 'Preferred_Stock': 'PFF', # Credit conditions | |
| # Global Safe Havens | |
| 'Swiss_Franc': 'CHF=X', | |
| 'Gold_Futures': 'GC=F', # Already have as 'Gold' | |
| 'Bitcoin': 'BTC-USD', # Alternative safe haven / risk asset | |
| # Commodity Inflation | |
| 'Agricultural': 'DBA', | |
| 'Base_Metals': 'DBB', | |
| 'Crude_Oil': 'USO', | |
| # Labor Market | |
| 'Staffing': 'SIA', # Staffing index (leading indicator) | |
| # Housing Market | |
| 'Mortgage_Backed_Securities': 'MBB', | |
| 'REITs': 'VNQ', | |
| # Consumer Health | |
| 'Consumer_Discretionary_vs_Staples': 'XLY', # Already have | |
| 'Restaurants': 'EAT', # Consumer spending proxy | |
| 'Retail_Luxury': 'RL', # High-end consumer | |
| # Tech Innovation Cycles | |
| 'Cloud_Computing': 'SKYY', | |
| 'Robotics_AI': 'BOTZ', | |
| 'Fintech': 'FINX', | |
| # Geopolitical Tension Proxies | |
| 'Uranium': 'URA', # Nuclear/energy security | |
| 'Rare_Earth': 'REMX', # Supply chain geopolitics | |
| 'Water': 'PHO', # Resource scarcity | |
| # Sentiment & Positioning | |
| 'Leveraged_Loans': 'BKLN', # Credit appetite | |
| 'TIPS_Spread': 'TIP', # Inflation expectations | |
| } | |
| # FRED Economic Series | |
| self.fred_series = { | |
| # Labor Market | |
| 'UNRATE': 'UNRATE', # Unemployment Rate | |
| 'PAYEMS': 'PAYEMS', # Non-Farm Payrolls | |
| 'ICSA': 'ICSA', # Initial Jobless Claims | |
| 'JTSJOL': 'JTSJOL', # Job Openings (JOLTS) | |
| # Inflation | |
| 'CPIAUCSL': 'CPIAUCSL', # CPI | |
| 'CPILFESL': 'CPILFESL', # Core CPI | |
| 'PPIACO': 'PPIACO', # PPI | |
| 'PCEPILFE': 'PCEPILFE', # Core PCE (Fed's preferred) | |
| # Production & Manufacturing | |
| 'INDPRO': 'INDPRO', # Industrial Production | |
| 'IPMAN': 'IPMAN', # Manufacturing Production | |
| 'TOTALSA': 'TOTALSA', # Total Vehicle Sales | |
| 'UMTMVS': 'UMTMVS', # Manufacturing New Orders | |
| # Money & Credit | |
| 'M2': 'M2SL', # M2 Money Supply | |
| 'WALCL': 'WALCL', # Fed Balance Sheet | |
| 'TOTCI': 'TOTCI', # Commercial & Industrial Loans | |
| # Consumer | |
| 'CONSUMER_SENTIMENT': 'UMCSENT', # Consumer Sentiment | |
| 'RSXFS': 'RSXFS', # Retail Sales | |
| 'PCE': 'PCE', # Personal Consumption | |
| 'PSAVERT': 'PSAVERT', # Personal Saving Rate | |
| # Housing | |
| 'HOUST': 'HOUST', # Housing Starts | |
| 'MORTGAGE30US': 'MORTGAGE30US', # 30-Year Mortgage Rate | |
| 'CSUSHPISA': 'CSUSHPISA', # Case-Shiller Home Price Index | |
| # Trade & GDP | |
| 'GDP': 'GDP', # GDP | |
| 'NETEXP': 'NETEXP', # Net Exports | |
| 'BOPGSTB': 'BOPGSTB', # Trade Balance | |
| # Yield Curve & Credit | |
| 'T10Y2Y': 'T10Y2Y', # 10Y-2Y Yield Spread (recession indicator) | |
| 'T10YIE': 'T10YIE', # 10Y Breakeven Inflation Rate | |
| 'BAMLH0A0HYM2': 'BAMLH0A0HYM2', # High Yield Spread | |
| 'DFII10': 'DFII10', # 10-Year TIPS | |
| # Leading Indicators | |
| 'USSLIND': 'USSLIND', # Leading Index | |
| 'DCOILWTICO': 'DCOILWTICO', # WTI Crude Oil Price | |
| # Corporate & Business | |
| 'CPROFIT': 'CPROFIT', # Corporate Profits | |
| 'BUSLOANS': 'BUSLOANS', # Business Loans | |
| } | |
| def download_all_data(self, start_date='2018-01-01', end_date=None): | |
| """ | |
| Download all market data and return a single unified DataFrame | |
| Parameters: | |
| ----------- | |
| start_date : str | |
| Start date in 'YYYY-MM-DD' format | |
| end_date : str, optional | |
| End date in 'YYYY-MM-DD' format (defaults to today) | |
| Returns: | |
| -------- | |
| pd.DataFrame | |
| Unified DataFrame with all market data, date-indexed | |
| """ | |
| if end_date is None: | |
| end_date = datetime.now().strftime('%Y-%m-%d') | |
| print("=" * 80) | |
| print("🚀 UNIFIED MARKET DATA DOWNLOAD") | |
| print("=" * 80) | |
| print(f"📅 Period: {start_date} to {end_date}\n") | |
| # Step 1: Download all market data | |
| print("📊 Downloading Market Data...") | |
| market_data = self._download_market_data(start_date, end_date) | |
| # Step 2: Download FRED economic data | |
| if self.fred_api_key and self.fred_api_key != 'your_api_key_here': | |
| print("\n📈 Downloading Economic Data (FRED)...") | |
| economic_data = self._download_fred_data(start_date, end_date) | |
| else: | |
| print("\n⚠️ FRED API key not provided - skipping economic data") | |
| print(" Get your free API key at: https://fred.stlouisfed.org/docs/api/api_key.html") | |
| economic_data = {} | |
| # Step 3: Merge everything into single DataFrame | |
| print("\n🔗 Merging all data sources...") | |
| unified_df = self._merge_all_data(market_data, economic_data, start_date, end_date) | |
| # Print summary | |
| print("\n" + "=" * 80) | |
| print("✅ DOWNLOAD COMPLETE") | |
| print("=" * 80) | |
| print(f"📊 Total Columns: {unified_df.shape[1]}") | |
| print(f"📅 Total Rows: {unified_df.shape[0]}") | |
| print(f"🗓️ Date Range: {unified_df.index.min().strftime('%Y-%m-%d')} to {unified_df.index.max().strftime('%Y-%m-%d')}") | |
| print(f"📉 Missing Values: {unified_df.isnull().sum().sum()}") | |
| print(f"✨ Completeness: {(1 - unified_df.isnull().sum().sum()/(unified_df.shape[0]*unified_df.shape[1]))*100:.2f}%") | |
| print("\n📋 Column Preview:") | |
| print(unified_df.columns.tolist()[:10], "...\n") | |
| return unified_df | |
| def _download_market_data(self, start_date, end_date): | |
| """Download all market data from Yahoo Finance""" | |
| all_tickers = list(self.data_sources.values()) | |
| print(f" Downloading {len(all_tickers)} tickers...") | |
| # Download all at once (faster) | |
| try: | |
| data = yf.download(all_tickers, start=start_date, end=end_date, | |
| progress=False, auto_adjust=True, threads=True) | |
| # Extract Close prices | |
| if isinstance(data.columns, pd.MultiIndex): | |
| close_data = data['Close'] | |
| else: | |
| close_data = data[['Close']] if 'Close' in data.columns else data | |
| # Rename columns to our names | |
| ticker_to_name = {v: k for k, v in self.data_sources.items()} | |
| close_data.columns = [ticker_to_name.get(col, col) for col in close_data.columns] | |
| # Remove any columns that failed to download | |
| close_data = close_data(axis=1, how='all') | |
| print(f" ✅ Successfully downloaded {len(close_data.columns)} series") | |
| return close_data | |
| except Exception as e: | |
| print(f" ⚠️ Batch download failed: {e}") | |
| print(" Trying individual downloads...") | |
| return self._download_individual(start_date, end_date) | |
| def _download_individual(self, start_date, end_date): | |
| """Fallback: download tickers individually""" | |
| data_dict = {} | |
| for name, ticker in self.data_sources.items(): | |
| try: | |
| df = yf.download(ticker, start=start_date, end=end_date, | |
| progress=False, auto_adjust=True) | |
| if not df.empty and 'Close' in df.columns: | |
| series = df['Close'].squeeze() | |
| if isinstance(series, pd.DataFrame): | |
| series = series.iloc[:, 0] | |
| if isinstance(series, pd.Series) and len(series) > 10: | |
| data_dict[name] = series | |
| print(f" ✅ {name}") | |
| except Exception as e: | |
| print(f" ❌ {name}: {str(e)[:50]}") | |
| continue | |
| return pd.DataFrame(data_dict) | |
| def _download_fred_data(self, start_date, end_date): | |
| """Download economic data from FRED""" | |
| economic_data = {} | |
| for name, series_id in self.fred_series.items(): | |
| try: | |
| url = "https://api.stlouisfed.org/fred/series/observations" | |
| params = { | |
| 'series_id': series_id, | |
| 'api_key': self.fred_api_key, | |
| 'file_type': 'json', | |
| 'observation_start': start_date, | |
| 'observation_end': end_date, | |
| } | |
| response = requests.get(url, params=params, timeout=30) | |
| if response.status_code == 200: | |
| data = response.json() | |
| if 'observations' in data and data['observations']: | |
| df = pd.DataFrame(data['observations']) | |
| df['value'] = pd.to_numeric(df['value'], errors='coerce') | |
| df['date'] = pd.to_datetime(df['date']) | |
| series = df.set_index('date')['value'] | |
| if len(series) > 10: | |
| economic_data[name] = series | |
| print(f" ✅ {name}") | |
| else: | |
| print(f" ❌ {name}: HTTP {response.status_code}") | |
| except Exception as e: | |
| print(f" ❌ {name}: {str(e)[:50]}") | |
| continue | |
| return economic_data | |
| def _merge_all_data(self, market_data, economic_data, start_date, end_date): | |
| """Merge all data sources into single DataFrame with ffill + bfill""" | |
| # Create a full daily date range (including weekends/holidays) | |
| date_range = pd.date_range(start=start_date, end=end_date, freq='D') | |
| # Initialize unified DataFrame with full date index | |
| unified = pd.DataFrame(index=date_range) | |
| # Add market data (already date-indexed from yfinance) | |
| for col in market_data.columns: | |
| unified[col] = market_data[col].reindex(date_range) | |
| # Add FRED economic data | |
| for name, series in economic_data.items(): | |
| unified[name] = series.reindex(date_range) | |
| # Forward-fill, then backward-fill to handle leading/trailing NaNs | |
| unified = unified.ffill().bfill() | |
| # Remove any columns that are still entirely NaN (e.g., failed downloads) | |
| unified = unified.dropna(axis=1, how='all') | |
| return unified | |
| ''' | |
| # ====================== | |
| # USAGE EXAMPLE | |
| # ====================== | |
| if __name__ == "__main__": | |
| # Initialize downloader | |
| downloader = UnifiedMarketDataDownloader(fred_api_key=FRED_API_KEY) | |
| # Download all data | |
| raw_market_data = downloader.download_all_data( | |
| start_date='2018-01-01', | |
| end_date=None # defaults to today | |
| ) | |
| # Save to CSV (optional) | |
| # raw_market_data.to_csv('unified_market_data.csv') | |
| # print("\n💾 Saved to: unified_market_data.csv") | |
| ''' | |