Create geo_macro_df.py
Browse files- geo_macro_df.py +422 -0
geo_macro_df.py
ADDED
|
@@ -0,0 +1,422 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# geo_macro_df.py
|
| 2 |
+
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
import yfinance as yf
|
| 6 |
+
import requests
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
import warnings
|
| 9 |
+
warnings.filterwarnings('ignore')
|
| 10 |
+
|
| 11 |
+
# ======================
|
| 12 |
+
# CONFIGURATION
|
| 13 |
+
# ======================
|
| 14 |
+
|
| 15 |
+
# Add your FRED API key here
|
| 16 |
+
FRED_API_KEY = '23f3511b0ca43918ccd503ef64cb844e'
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# ======================
|
| 20 |
+
# UNIFIED DATA DOWNLOADER
|
| 21 |
+
# ======================
|
| 22 |
+
|
| 23 |
+
class UnifiedMarketDataDownloader:
|
| 24 |
+
"""Downloads all market data into a single DataFrame"""
|
| 25 |
+
|
| 26 |
+
def __init__(self, fred_api_key=None):
|
| 27 |
+
self.fred_api_key = fred_api_key
|
| 28 |
+
|
| 29 |
+
# All market data sources
|
| 30 |
+
self.data_sources = {
|
| 31 |
+
# US Rates & Currencies
|
| 32 |
+
'DGS10': '^TNX',
|
| 33 |
+
'DGS2': '^FVX',
|
| 34 |
+
'DGS3MO': '^IRX',
|
| 35 |
+
'DXY': 'DX-Y.NYB',
|
| 36 |
+
'EURUSD': 'EURUSD=X',
|
| 37 |
+
'JPYUSD': 'JPYUSD=X',
|
| 38 |
+
|
| 39 |
+
# US Equity Indices
|
| 40 |
+
'SP500': '^GSPC',
|
| 41 |
+
'NASDAQ': '^IXIC',
|
| 42 |
+
'RUSSELL': '^RUT',
|
| 43 |
+
'DJI': '^DJI',
|
| 44 |
+
'VIX': '^VIX',
|
| 45 |
+
'VXN': '^VXN',
|
| 46 |
+
|
| 47 |
+
# Commodities
|
| 48 |
+
'Gold': 'GC=F',
|
| 49 |
+
'Oil': 'CL=F',
|
| 50 |
+
'Copper': 'HG=F',
|
| 51 |
+
'Silver': 'SI=F',
|
| 52 |
+
'NaturalGas': 'NG=F',
|
| 53 |
+
|
| 54 |
+
# Credit & Fixed Income
|
| 55 |
+
'HYG': 'HYG',
|
| 56 |
+
'JNK': 'JNK',
|
| 57 |
+
'LQD': 'LQD',
|
| 58 |
+
'TIP': 'TIP',
|
| 59 |
+
'TLT': 'TLT',
|
| 60 |
+
|
| 61 |
+
# Global Markets
|
| 62 |
+
'China': 'FXI',
|
| 63 |
+
'China_Tech': 'KWEB',
|
| 64 |
+
'Europe': 'FEZ',
|
| 65 |
+
'Europe_Financials': 'EUFN',
|
| 66 |
+
'Japan': 'EWJ',
|
| 67 |
+
'South_Korea': 'EWY',
|
| 68 |
+
'Taiwan': 'EWT',
|
| 69 |
+
'India': 'INDA',
|
| 70 |
+
'Brazil': 'EWZ',
|
| 71 |
+
'Emerging_Markets': 'EEM',
|
| 72 |
+
|
| 73 |
+
# Global Currencies
|
| 74 |
+
'CNY': 'CNY=X',
|
| 75 |
+
'JPY': 'JPY=X',
|
| 76 |
+
'EUR': 'EUR=X',
|
| 77 |
+
'GBP': 'GBP=X',
|
| 78 |
+
|
| 79 |
+
# Geopolitical Indicators
|
| 80 |
+
'Defense_Stocks': 'ITA',
|
| 81 |
+
'Cybersecurity': 'HACK',
|
| 82 |
+
'Energy_Security': 'XLE',
|
| 83 |
+
'Gold_Safe_Haven': 'GLD',
|
| 84 |
+
'US_Treasuries_Long': 'TLT',
|
| 85 |
+
|
| 86 |
+
# Sectors
|
| 87 |
+
'Technology': 'XLK',
|
| 88 |
+
'Financials': 'XLF',
|
| 89 |
+
'Healthcare': 'XLV',
|
| 90 |
+
'Consumer_Discretionary': 'XLY',
|
| 91 |
+
'Consumer_Staples': 'XLP',
|
| 92 |
+
'Energy': 'XLE',
|
| 93 |
+
'Materials': 'XLB',
|
| 94 |
+
'Industrials': 'XLI',
|
| 95 |
+
'Utilities': 'XLU',
|
| 96 |
+
'Real_Estate': 'XLRE',
|
| 97 |
+
'Communication_Services': 'XLC',
|
| 98 |
+
|
| 99 |
+
# Sector Details
|
| 100 |
+
'Regional_Banks': 'KRE',
|
| 101 |
+
'Homebuilders': 'XHB',
|
| 102 |
+
'Retail': 'XRT',
|
| 103 |
+
'Transportation': 'XTN',
|
| 104 |
+
'Semiconductors': 'SMH',
|
| 105 |
+
'Clean_Energy': 'ICLN',
|
| 106 |
+
'Aerospace_Defense': 'XAR',
|
| 107 |
+
|
| 108 |
+
# Supply Chain & Logistics
|
| 109 |
+
'Baltic_Dry_Index': 'BDRY', # Shipping costs proxy
|
| 110 |
+
'Logistics': 'XTN',
|
| 111 |
+
|
| 112 |
+
# Credit Spreads & Risk
|
| 113 |
+
'Investment_Grade_Spread': 'LQD', # Already have, key for credit risk
|
| 114 |
+
'Emerging_Market_Debt': 'EMB',
|
| 115 |
+
'Muni_Bonds': 'MUB',
|
| 116 |
+
|
| 117 |
+
# Inflation Breakevens
|
| 118 |
+
'Inflation_Protected': 'TIP', # Already have
|
| 119 |
+
'Short_Term_Treasuries': 'SHY',
|
| 120 |
+
'Intermediate_Treasuries': 'IEF',
|
| 121 |
+
|
| 122 |
+
# Currency Volatility
|
| 123 |
+
'USD_Emerging': 'UUP', # USD strength
|
| 124 |
+
'Gold_Miners': 'GDX', # Gold mining companies (more volatile than GLD)
|
| 125 |
+
|
| 126 |
+
# Economic Cycle Indicators
|
| 127 |
+
'Small_Cap_Value': 'IWN', # Early cycle indicator
|
| 128 |
+
'High_Dividend': 'VYM', # Late cycle/defensive
|
| 129 |
+
'Growth_Stocks': 'VUG', # Risk-on
|
| 130 |
+
'Value_Stocks': 'VTV', # Risk-off rotation
|
| 131 |
+
|
| 132 |
+
# Liquidity & Credit Conditions
|
| 133 |
+
'Mortgage_REITs': 'REM', # Interest rate sensitivity
|
| 134 |
+
'Preferred_Stock': 'PFF', # Credit conditions
|
| 135 |
+
|
| 136 |
+
# Global Safe Havens
|
| 137 |
+
'Swiss_Franc': 'CHF=X',
|
| 138 |
+
'Gold_Futures': 'GC=F', # Already have as 'Gold'
|
| 139 |
+
'Bitcoin': 'BTC-USD', # Alternative safe haven / risk asset
|
| 140 |
+
|
| 141 |
+
# Commodity Inflation
|
| 142 |
+
'Agricultural': 'DBA',
|
| 143 |
+
'Base_Metals': 'DBB',
|
| 144 |
+
'Crude_Oil': 'USO',
|
| 145 |
+
|
| 146 |
+
# Labor Market
|
| 147 |
+
'Staffing': 'SIA', # Staffing index (leading indicator)
|
| 148 |
+
|
| 149 |
+
# Housing Market
|
| 150 |
+
'Mortgage_Backed_Securities': 'MBB',
|
| 151 |
+
'REITs': 'VNQ',
|
| 152 |
+
|
| 153 |
+
# Consumer Health
|
| 154 |
+
'Consumer_Discretionary_vs_Staples': 'XLY', # Already have
|
| 155 |
+
'Restaurants': 'EAT', # Consumer spending proxy
|
| 156 |
+
'Retail_Luxury': 'RL', # High-end consumer
|
| 157 |
+
|
| 158 |
+
# Tech Innovation Cycles
|
| 159 |
+
'Cloud_Computing': 'SKYY',
|
| 160 |
+
'Robotics_AI': 'BOTZ',
|
| 161 |
+
'Fintech': 'FINX',
|
| 162 |
+
|
| 163 |
+
# Geopolitical Tension Proxies
|
| 164 |
+
'Uranium': 'URA', # Nuclear/energy security
|
| 165 |
+
'Rare_Earth': 'REMX', # Supply chain geopolitics
|
| 166 |
+
'Water': 'PHO', # Resource scarcity
|
| 167 |
+
|
| 168 |
+
# Sentiment & Positioning
|
| 169 |
+
'Leveraged_Loans': 'BKLN', # Credit appetite
|
| 170 |
+
'TIPS_Spread': 'TIP', # Inflation expectations
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
# FRED Economic Series
|
| 174 |
+
self.fred_series = {
|
| 175 |
+
# Labor Market
|
| 176 |
+
'UNRATE': 'UNRATE', # Unemployment Rate
|
| 177 |
+
'PAYEMS': 'PAYEMS', # Non-Farm Payrolls
|
| 178 |
+
'ICSA': 'ICSA', # Initial Jobless Claims
|
| 179 |
+
'JTSJOL': 'JTSJOL', # Job Openings (JOLTS)
|
| 180 |
+
|
| 181 |
+
# Inflation
|
| 182 |
+
'CPIAUCSL': 'CPIAUCSL', # CPI
|
| 183 |
+
'CPILFESL': 'CPILFESL', # Core CPI
|
| 184 |
+
'PPIACO': 'PPIACO', # PPI
|
| 185 |
+
'PCEPILFE': 'PCEPILFE', # Core PCE (Fed's preferred)
|
| 186 |
+
|
| 187 |
+
# Production & Manufacturing
|
| 188 |
+
'INDPRO': 'INDPRO', # Industrial Production
|
| 189 |
+
'IPMAN': 'IPMAN', # Manufacturing Production
|
| 190 |
+
'TOTALSA': 'TOTALSA', # Total Vehicle Sales
|
| 191 |
+
'UMTMVS': 'UMTMVS', # Manufacturing New Orders
|
| 192 |
+
|
| 193 |
+
# Money & Credit
|
| 194 |
+
'M2': 'M2SL', # M2 Money Supply
|
| 195 |
+
'WALCL': 'WALCL', # Fed Balance Sheet
|
| 196 |
+
'TOTCI': 'TOTCI', # Commercial & Industrial Loans
|
| 197 |
+
|
| 198 |
+
# Consumer
|
| 199 |
+
'CONSUMER_SENTIMENT': 'UMCSENT', # Consumer Sentiment
|
| 200 |
+
'RSXFS': 'RSXFS', # Retail Sales
|
| 201 |
+
'PCE': 'PCE', # Personal Consumption
|
| 202 |
+
'PSAVERT': 'PSAVERT', # Personal Saving Rate
|
| 203 |
+
|
| 204 |
+
# Housing
|
| 205 |
+
'HOUST': 'HOUST', # Housing Starts
|
| 206 |
+
'MORTGAGE30US': 'MORTGAGE30US', # 30-Year Mortgage Rate
|
| 207 |
+
'CSUSHPISA': 'CSUSHPISA', # Case-Shiller Home Price Index
|
| 208 |
+
|
| 209 |
+
# Trade & GDP
|
| 210 |
+
'GDP': 'GDP', # GDP
|
| 211 |
+
'NETEXP': 'NETEXP', # Net Exports
|
| 212 |
+
'BOPGSTB': 'BOPGSTB', # Trade Balance
|
| 213 |
+
|
| 214 |
+
# Yield Curve & Credit
|
| 215 |
+
'T10Y2Y': 'T10Y2Y', # 10Y-2Y Yield Spread (recession indicator)
|
| 216 |
+
'T10YIE': 'T10YIE', # 10Y Breakeven Inflation Rate
|
| 217 |
+
'BAMLH0A0HYM2': 'BAMLH0A0HYM2', # High Yield Spread
|
| 218 |
+
'DFII10': 'DFII10', # 10-Year TIPS
|
| 219 |
+
|
| 220 |
+
# Leading Indicators
|
| 221 |
+
'USSLIND': 'USSLIND', # Leading Index
|
| 222 |
+
'DCOILWTICO': 'DCOILWTICO', # WTI Crude Oil Price
|
| 223 |
+
|
| 224 |
+
# Corporate & Business
|
| 225 |
+
'CPROFIT': 'CPROFIT', # Corporate Profits
|
| 226 |
+
'BUSLOANS': 'BUSLOANS', # Business Loans
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
def download_all_data(self, start_date='2018-01-01', end_date=None):
|
| 230 |
+
"""
|
| 231 |
+
Download all market data and return a single unified DataFrame
|
| 232 |
+
|
| 233 |
+
Parameters:
|
| 234 |
+
-----------
|
| 235 |
+
start_date : str
|
| 236 |
+
Start date in 'YYYY-MM-DD' format
|
| 237 |
+
end_date : str, optional
|
| 238 |
+
End date in 'YYYY-MM-DD' format (defaults to today)
|
| 239 |
+
|
| 240 |
+
Returns:
|
| 241 |
+
--------
|
| 242 |
+
pd.DataFrame
|
| 243 |
+
Unified DataFrame with all market data, date-indexed
|
| 244 |
+
"""
|
| 245 |
+
if end_date is None:
|
| 246 |
+
end_date = datetime.now().strftime('%Y-%m-%d')
|
| 247 |
+
|
| 248 |
+
print("=" * 80)
|
| 249 |
+
print("🚀 UNIFIED MARKET DATA DOWNLOAD")
|
| 250 |
+
print("=" * 80)
|
| 251 |
+
print(f"📅 Period: {start_date} to {end_date}\n")
|
| 252 |
+
|
| 253 |
+
# Step 1: Download all market data
|
| 254 |
+
print("📊 Downloading Market Data...")
|
| 255 |
+
market_data = self._download_market_data(start_date, end_date)
|
| 256 |
+
|
| 257 |
+
# Step 2: Download FRED economic data
|
| 258 |
+
if self.fred_api_key and self.fred_api_key != 'your_api_key_here':
|
| 259 |
+
print("\n📈 Downloading Economic Data (FRED)...")
|
| 260 |
+
economic_data = self._download_fred_data(start_date, end_date)
|
| 261 |
+
else:
|
| 262 |
+
print("\n⚠️ FRED API key not provided - skipping economic data")
|
| 263 |
+
print(" Get your free API key at: https://fred.stlouisfed.org/docs/api/api_key.html")
|
| 264 |
+
economic_data = {}
|
| 265 |
+
|
| 266 |
+
# Step 3: Merge everything into single DataFrame
|
| 267 |
+
print("\n🔗 Merging all data sources...")
|
| 268 |
+
unified_df = self._merge_all_data(market_data, economic_data, start_date, end_date)
|
| 269 |
+
|
| 270 |
+
# Print summary
|
| 271 |
+
print("\n" + "=" * 80)
|
| 272 |
+
print("✅ DOWNLOAD COMPLETE")
|
| 273 |
+
print("=" * 80)
|
| 274 |
+
print(f"📊 Total Columns: {unified_df.shape[1]}")
|
| 275 |
+
print(f"📅 Total Rows: {unified_df.shape[0]}")
|
| 276 |
+
print(f"🗓️ Date Range: {unified_df.index.min().strftime('%Y-%m-%d')} to {unified_df.index.max().strftime('%Y-%m-%d')}")
|
| 277 |
+
print(f"📉 Missing Values: {unified_df.isnull().sum().sum()}")
|
| 278 |
+
print(f"✨ Completeness: {(1 - unified_df.isnull().sum().sum()/(unified_df.shape[0]*unified_df.shape[1]))*100:.2f}%")
|
| 279 |
+
|
| 280 |
+
print("\n📋 Column Preview:")
|
| 281 |
+
print(unified_df.columns.tolist()[:10], "...\n")
|
| 282 |
+
|
| 283 |
+
return unified_df
|
| 284 |
+
|
| 285 |
+
def _download_market_data(self, start_date, end_date):
|
| 286 |
+
"""Download all market data from Yahoo Finance"""
|
| 287 |
+
all_tickers = list(self.data_sources.values())
|
| 288 |
+
|
| 289 |
+
print(f" Downloading {len(all_tickers)} tickers...")
|
| 290 |
+
|
| 291 |
+
# Download all at once (faster)
|
| 292 |
+
try:
|
| 293 |
+
data = yf.download(all_tickers, start=start_date, end=end_date,
|
| 294 |
+
progress=False, auto_adjust=True, threads=True)
|
| 295 |
+
|
| 296 |
+
# Extract Close prices
|
| 297 |
+
if isinstance(data.columns, pd.MultiIndex):
|
| 298 |
+
close_data = data['Close']
|
| 299 |
+
else:
|
| 300 |
+
close_data = data[['Close']] if 'Close' in data.columns else data
|
| 301 |
+
|
| 302 |
+
# Rename columns to our names
|
| 303 |
+
ticker_to_name = {v: k for k, v in self.data_sources.items()}
|
| 304 |
+
close_data.columns = [ticker_to_name.get(col, col) for col in close_data.columns]
|
| 305 |
+
|
| 306 |
+
# Remove any columns that failed to download
|
| 307 |
+
close_data = close_data.dropna(axis=1, how='all')
|
| 308 |
+
|
| 309 |
+
print(f" ✅ Successfully downloaded {len(close_data.columns)} series")
|
| 310 |
+
return close_data
|
| 311 |
+
|
| 312 |
+
except Exception as e:
|
| 313 |
+
print(f" ⚠️ Batch download failed: {e}")
|
| 314 |
+
print(" Trying individual downloads...")
|
| 315 |
+
return self._download_individual(start_date, end_date)
|
| 316 |
+
|
| 317 |
+
def _download_individual(self, start_date, end_date):
|
| 318 |
+
"""Fallback: download tickers individually"""
|
| 319 |
+
data_dict = {}
|
| 320 |
+
|
| 321 |
+
for name, ticker in self.data_sources.items():
|
| 322 |
+
try:
|
| 323 |
+
df = yf.download(ticker, start=start_date, end=end_date,
|
| 324 |
+
progress=False, auto_adjust=True)
|
| 325 |
+
|
| 326 |
+
if not df.empty and 'Close' in df.columns:
|
| 327 |
+
series = df['Close'].squeeze()
|
| 328 |
+
if isinstance(series, pd.DataFrame):
|
| 329 |
+
series = series.iloc[:, 0]
|
| 330 |
+
if isinstance(series, pd.Series) and len(series) > 10:
|
| 331 |
+
data_dict[name] = series
|
| 332 |
+
print(f" ✅ {name}")
|
| 333 |
+
except Exception as e:
|
| 334 |
+
print(f" ❌ {name}: {str(e)[:50]}")
|
| 335 |
+
continue
|
| 336 |
+
|
| 337 |
+
return pd.DataFrame(data_dict)
|
| 338 |
+
|
| 339 |
+
def _download_fred_data(self, start_date, end_date):
|
| 340 |
+
"""Download economic data from FRED"""
|
| 341 |
+
economic_data = {}
|
| 342 |
+
|
| 343 |
+
for name, series_id in self.fred_series.items():
|
| 344 |
+
try:
|
| 345 |
+
url = "https://api.stlouisfed.org/fred/series/observations"
|
| 346 |
+
params = {
|
| 347 |
+
'series_id': series_id,
|
| 348 |
+
'api_key': self.fred_api_key,
|
| 349 |
+
'file_type': 'json',
|
| 350 |
+
'observation_start': start_date,
|
| 351 |
+
'observation_end': end_date,
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
response = requests.get(url, params=params, timeout=30)
|
| 355 |
+
|
| 356 |
+
if response.status_code == 200:
|
| 357 |
+
data = response.json()
|
| 358 |
+
if 'observations' in data and data['observations']:
|
| 359 |
+
df = pd.DataFrame(data['observations'])
|
| 360 |
+
df['value'] = pd.to_numeric(df['value'], errors='coerce')
|
| 361 |
+
df['date'] = pd.to_datetime(df['date'])
|
| 362 |
+
series = df.set_index('date')['value'].dropna()
|
| 363 |
+
|
| 364 |
+
if len(series) > 10:
|
| 365 |
+
economic_data[name] = series
|
| 366 |
+
print(f" ✅ {name}")
|
| 367 |
+
else:
|
| 368 |
+
print(f" ❌ {name}: HTTP {response.status_code}")
|
| 369 |
+
|
| 370 |
+
except Exception as e:
|
| 371 |
+
print(f" ❌ {name}: {str(e)[:50]}")
|
| 372 |
+
continue
|
| 373 |
+
|
| 374 |
+
return economic_data
|
| 375 |
+
|
| 376 |
+
def _merge_all_data(self, market_data, economic_data, start_date, end_date):
|
| 377 |
+
"""Merge all data sources into single DataFrame with proper filling"""
|
| 378 |
+
|
| 379 |
+
# Create base date range (business days)
|
| 380 |
+
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
|
| 381 |
+
|
| 382 |
+
# Start with market data
|
| 383 |
+
unified = market_data.copy()
|
| 384 |
+
unified = unified.reindex(date_range)
|
| 385 |
+
|
| 386 |
+
# Add economic data
|
| 387 |
+
for name, series in economic_data.items():
|
| 388 |
+
unified[name] = series.reindex(date_range)
|
| 389 |
+
|
| 390 |
+
# Forward fill then backward fill (handles weekends/holidays)
|
| 391 |
+
unified = unified.ffill().bfill()
|
| 392 |
+
|
| 393 |
+
# Drop any columns that are still all NaN
|
| 394 |
+
unified = unified.dropna(axis=1, how='all')
|
| 395 |
+
|
| 396 |
+
return unified
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
|
| 400 |
+
'''
|
| 401 |
+
|
| 402 |
+
# ======================
|
| 403 |
+
# USAGE EXAMPLE
|
| 404 |
+
# ======================
|
| 405 |
+
|
| 406 |
+
if __name__ == "__main__":
|
| 407 |
+
# Initialize downloader
|
| 408 |
+
downloader = UnifiedMarketDataDownloader(fred_api_key=FRED_API_KEY)
|
| 409 |
+
|
| 410 |
+
# Download all data
|
| 411 |
+
raw_market_data = downloader.download_all_data(
|
| 412 |
+
start_date='2018-01-01',
|
| 413 |
+
end_date=None # defaults to today
|
| 414 |
+
)
|
| 415 |
+
|
| 416 |
+
|
| 417 |
+
|
| 418 |
+
# Save to CSV (optional)
|
| 419 |
+
# raw_market_data.to_csv('unified_market_data.csv')
|
| 420 |
+
# print("\n💾 Saved to: unified_market_data.csv")
|
| 421 |
+
|
| 422 |
+
'''
|