JayLacoma commited on
Commit
85afe92
·
verified ·
1 Parent(s): ad0e75b

Create geo_macro_df.py

Browse files
Files changed (1) hide show
  1. geo_macro_df.py +422 -0
geo_macro_df.py ADDED
@@ -0,0 +1,422 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # geo_macro_df.py
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+ import yfinance as yf
6
+ import requests
7
+ from datetime import datetime
8
+ import warnings
9
+ warnings.filterwarnings('ignore')
10
+
11
+ # ======================
12
+ # CONFIGURATION
13
+ # ======================
14
+
15
+ # Add your FRED API key here
16
+ FRED_API_KEY = '23f3511b0ca43918ccd503ef64cb844e'
17
+
18
+
19
+ # ======================
20
+ # UNIFIED DATA DOWNLOADER
21
+ # ======================
22
+
23
+ class UnifiedMarketDataDownloader:
24
+ """Downloads all market data into a single DataFrame"""
25
+
26
+ def __init__(self, fred_api_key=None):
27
+ self.fred_api_key = fred_api_key
28
+
29
+ # All market data sources
30
+ self.data_sources = {
31
+ # US Rates & Currencies
32
+ 'DGS10': '^TNX',
33
+ 'DGS2': '^FVX',
34
+ 'DGS3MO': '^IRX',
35
+ 'DXY': 'DX-Y.NYB',
36
+ 'EURUSD': 'EURUSD=X',
37
+ 'JPYUSD': 'JPYUSD=X',
38
+
39
+ # US Equity Indices
40
+ 'SP500': '^GSPC',
41
+ 'NASDAQ': '^IXIC',
42
+ 'RUSSELL': '^RUT',
43
+ 'DJI': '^DJI',
44
+ 'VIX': '^VIX',
45
+ 'VXN': '^VXN',
46
+
47
+ # Commodities
48
+ 'Gold': 'GC=F',
49
+ 'Oil': 'CL=F',
50
+ 'Copper': 'HG=F',
51
+ 'Silver': 'SI=F',
52
+ 'NaturalGas': 'NG=F',
53
+
54
+ # Credit & Fixed Income
55
+ 'HYG': 'HYG',
56
+ 'JNK': 'JNK',
57
+ 'LQD': 'LQD',
58
+ 'TIP': 'TIP',
59
+ 'TLT': 'TLT',
60
+
61
+ # Global Markets
62
+ 'China': 'FXI',
63
+ 'China_Tech': 'KWEB',
64
+ 'Europe': 'FEZ',
65
+ 'Europe_Financials': 'EUFN',
66
+ 'Japan': 'EWJ',
67
+ 'South_Korea': 'EWY',
68
+ 'Taiwan': 'EWT',
69
+ 'India': 'INDA',
70
+ 'Brazil': 'EWZ',
71
+ 'Emerging_Markets': 'EEM',
72
+
73
+ # Global Currencies
74
+ 'CNY': 'CNY=X',
75
+ 'JPY': 'JPY=X',
76
+ 'EUR': 'EUR=X',
77
+ 'GBP': 'GBP=X',
78
+
79
+ # Geopolitical Indicators
80
+ 'Defense_Stocks': 'ITA',
81
+ 'Cybersecurity': 'HACK',
82
+ 'Energy_Security': 'XLE',
83
+ 'Gold_Safe_Haven': 'GLD',
84
+ 'US_Treasuries_Long': 'TLT',
85
+
86
+ # Sectors
87
+ 'Technology': 'XLK',
88
+ 'Financials': 'XLF',
89
+ 'Healthcare': 'XLV',
90
+ 'Consumer_Discretionary': 'XLY',
91
+ 'Consumer_Staples': 'XLP',
92
+ 'Energy': 'XLE',
93
+ 'Materials': 'XLB',
94
+ 'Industrials': 'XLI',
95
+ 'Utilities': 'XLU',
96
+ 'Real_Estate': 'XLRE',
97
+ 'Communication_Services': 'XLC',
98
+
99
+ # Sector Details
100
+ 'Regional_Banks': 'KRE',
101
+ 'Homebuilders': 'XHB',
102
+ 'Retail': 'XRT',
103
+ 'Transportation': 'XTN',
104
+ 'Semiconductors': 'SMH',
105
+ 'Clean_Energy': 'ICLN',
106
+ 'Aerospace_Defense': 'XAR',
107
+
108
+ # Supply Chain & Logistics
109
+ 'Baltic_Dry_Index': 'BDRY', # Shipping costs proxy
110
+ 'Logistics': 'XTN',
111
+
112
+ # Credit Spreads & Risk
113
+ 'Investment_Grade_Spread': 'LQD', # Already have, key for credit risk
114
+ 'Emerging_Market_Debt': 'EMB',
115
+ 'Muni_Bonds': 'MUB',
116
+
117
+ # Inflation Breakevens
118
+ 'Inflation_Protected': 'TIP', # Already have
119
+ 'Short_Term_Treasuries': 'SHY',
120
+ 'Intermediate_Treasuries': 'IEF',
121
+
122
+ # Currency Volatility
123
+ 'USD_Emerging': 'UUP', # USD strength
124
+ 'Gold_Miners': 'GDX', # Gold mining companies (more volatile than GLD)
125
+
126
+ # Economic Cycle Indicators
127
+ 'Small_Cap_Value': 'IWN', # Early cycle indicator
128
+ 'High_Dividend': 'VYM', # Late cycle/defensive
129
+ 'Growth_Stocks': 'VUG', # Risk-on
130
+ 'Value_Stocks': 'VTV', # Risk-off rotation
131
+
132
+ # Liquidity & Credit Conditions
133
+ 'Mortgage_REITs': 'REM', # Interest rate sensitivity
134
+ 'Preferred_Stock': 'PFF', # Credit conditions
135
+
136
+ # Global Safe Havens
137
+ 'Swiss_Franc': 'CHF=X',
138
+ 'Gold_Futures': 'GC=F', # Already have as 'Gold'
139
+ 'Bitcoin': 'BTC-USD', # Alternative safe haven / risk asset
140
+
141
+ # Commodity Inflation
142
+ 'Agricultural': 'DBA',
143
+ 'Base_Metals': 'DBB',
144
+ 'Crude_Oil': 'USO',
145
+
146
+ # Labor Market
147
+ 'Staffing': 'SIA', # Staffing index (leading indicator)
148
+
149
+ # Housing Market
150
+ 'Mortgage_Backed_Securities': 'MBB',
151
+ 'REITs': 'VNQ',
152
+
153
+ # Consumer Health
154
+ 'Consumer_Discretionary_vs_Staples': 'XLY', # Already have
155
+ 'Restaurants': 'EAT', # Consumer spending proxy
156
+ 'Retail_Luxury': 'RL', # High-end consumer
157
+
158
+ # Tech Innovation Cycles
159
+ 'Cloud_Computing': 'SKYY',
160
+ 'Robotics_AI': 'BOTZ',
161
+ 'Fintech': 'FINX',
162
+
163
+ # Geopolitical Tension Proxies
164
+ 'Uranium': 'URA', # Nuclear/energy security
165
+ 'Rare_Earth': 'REMX', # Supply chain geopolitics
166
+ 'Water': 'PHO', # Resource scarcity
167
+
168
+ # Sentiment & Positioning
169
+ 'Leveraged_Loans': 'BKLN', # Credit appetite
170
+ 'TIPS_Spread': 'TIP', # Inflation expectations
171
+ }
172
+
173
+ # FRED Economic Series
174
+ self.fred_series = {
175
+ # Labor Market
176
+ 'UNRATE': 'UNRATE', # Unemployment Rate
177
+ 'PAYEMS': 'PAYEMS', # Non-Farm Payrolls
178
+ 'ICSA': 'ICSA', # Initial Jobless Claims
179
+ 'JTSJOL': 'JTSJOL', # Job Openings (JOLTS)
180
+
181
+ # Inflation
182
+ 'CPIAUCSL': 'CPIAUCSL', # CPI
183
+ 'CPILFESL': 'CPILFESL', # Core CPI
184
+ 'PPIACO': 'PPIACO', # PPI
185
+ 'PCEPILFE': 'PCEPILFE', # Core PCE (Fed's preferred)
186
+
187
+ # Production & Manufacturing
188
+ 'INDPRO': 'INDPRO', # Industrial Production
189
+ 'IPMAN': 'IPMAN', # Manufacturing Production
190
+ 'TOTALSA': 'TOTALSA', # Total Vehicle Sales
191
+ 'UMTMVS': 'UMTMVS', # Manufacturing New Orders
192
+
193
+ # Money & Credit
194
+ 'M2': 'M2SL', # M2 Money Supply
195
+ 'WALCL': 'WALCL', # Fed Balance Sheet
196
+ 'TOTCI': 'TOTCI', # Commercial & Industrial Loans
197
+
198
+ # Consumer
199
+ 'CONSUMER_SENTIMENT': 'UMCSENT', # Consumer Sentiment
200
+ 'RSXFS': 'RSXFS', # Retail Sales
201
+ 'PCE': 'PCE', # Personal Consumption
202
+ 'PSAVERT': 'PSAVERT', # Personal Saving Rate
203
+
204
+ # Housing
205
+ 'HOUST': 'HOUST', # Housing Starts
206
+ 'MORTGAGE30US': 'MORTGAGE30US', # 30-Year Mortgage Rate
207
+ 'CSUSHPISA': 'CSUSHPISA', # Case-Shiller Home Price Index
208
+
209
+ # Trade & GDP
210
+ 'GDP': 'GDP', # GDP
211
+ 'NETEXP': 'NETEXP', # Net Exports
212
+ 'BOPGSTB': 'BOPGSTB', # Trade Balance
213
+
214
+ # Yield Curve & Credit
215
+ 'T10Y2Y': 'T10Y2Y', # 10Y-2Y Yield Spread (recession indicator)
216
+ 'T10YIE': 'T10YIE', # 10Y Breakeven Inflation Rate
217
+ 'BAMLH0A0HYM2': 'BAMLH0A0HYM2', # High Yield Spread
218
+ 'DFII10': 'DFII10', # 10-Year TIPS
219
+
220
+ # Leading Indicators
221
+ 'USSLIND': 'USSLIND', # Leading Index
222
+ 'DCOILWTICO': 'DCOILWTICO', # WTI Crude Oil Price
223
+
224
+ # Corporate & Business
225
+ 'CPROFIT': 'CPROFIT', # Corporate Profits
226
+ 'BUSLOANS': 'BUSLOANS', # Business Loans
227
+ }
228
+
229
+ def download_all_data(self, start_date='2018-01-01', end_date=None):
230
+ """
231
+ Download all market data and return a single unified DataFrame
232
+
233
+ Parameters:
234
+ -----------
235
+ start_date : str
236
+ Start date in 'YYYY-MM-DD' format
237
+ end_date : str, optional
238
+ End date in 'YYYY-MM-DD' format (defaults to today)
239
+
240
+ Returns:
241
+ --------
242
+ pd.DataFrame
243
+ Unified DataFrame with all market data, date-indexed
244
+ """
245
+ if end_date is None:
246
+ end_date = datetime.now().strftime('%Y-%m-%d')
247
+
248
+ print("=" * 80)
249
+ print("🚀 UNIFIED MARKET DATA DOWNLOAD")
250
+ print("=" * 80)
251
+ print(f"📅 Period: {start_date} to {end_date}\n")
252
+
253
+ # Step 1: Download all market data
254
+ print("📊 Downloading Market Data...")
255
+ market_data = self._download_market_data(start_date, end_date)
256
+
257
+ # Step 2: Download FRED economic data
258
+ if self.fred_api_key and self.fred_api_key != 'your_api_key_here':
259
+ print("\n📈 Downloading Economic Data (FRED)...")
260
+ economic_data = self._download_fred_data(start_date, end_date)
261
+ else:
262
+ print("\n⚠️ FRED API key not provided - skipping economic data")
263
+ print(" Get your free API key at: https://fred.stlouisfed.org/docs/api/api_key.html")
264
+ economic_data = {}
265
+
266
+ # Step 3: Merge everything into single DataFrame
267
+ print("\n🔗 Merging all data sources...")
268
+ unified_df = self._merge_all_data(market_data, economic_data, start_date, end_date)
269
+
270
+ # Print summary
271
+ print("\n" + "=" * 80)
272
+ print("✅ DOWNLOAD COMPLETE")
273
+ print("=" * 80)
274
+ print(f"📊 Total Columns: {unified_df.shape[1]}")
275
+ print(f"📅 Total Rows: {unified_df.shape[0]}")
276
+ print(f"🗓️ Date Range: {unified_df.index.min().strftime('%Y-%m-%d')} to {unified_df.index.max().strftime('%Y-%m-%d')}")
277
+ print(f"📉 Missing Values: {unified_df.isnull().sum().sum()}")
278
+ print(f"✨ Completeness: {(1 - unified_df.isnull().sum().sum()/(unified_df.shape[0]*unified_df.shape[1]))*100:.2f}%")
279
+
280
+ print("\n📋 Column Preview:")
281
+ print(unified_df.columns.tolist()[:10], "...\n")
282
+
283
+ return unified_df
284
+
285
+ def _download_market_data(self, start_date, end_date):
286
+ """Download all market data from Yahoo Finance"""
287
+ all_tickers = list(self.data_sources.values())
288
+
289
+ print(f" Downloading {len(all_tickers)} tickers...")
290
+
291
+ # Download all at once (faster)
292
+ try:
293
+ data = yf.download(all_tickers, start=start_date, end=end_date,
294
+ progress=False, auto_adjust=True, threads=True)
295
+
296
+ # Extract Close prices
297
+ if isinstance(data.columns, pd.MultiIndex):
298
+ close_data = data['Close']
299
+ else:
300
+ close_data = data[['Close']] if 'Close' in data.columns else data
301
+
302
+ # Rename columns to our names
303
+ ticker_to_name = {v: k for k, v in self.data_sources.items()}
304
+ close_data.columns = [ticker_to_name.get(col, col) for col in close_data.columns]
305
+
306
+ # Remove any columns that failed to download
307
+ close_data = close_data.dropna(axis=1, how='all')
308
+
309
+ print(f" ✅ Successfully downloaded {len(close_data.columns)} series")
310
+ return close_data
311
+
312
+ except Exception as e:
313
+ print(f" ⚠️ Batch download failed: {e}")
314
+ print(" Trying individual downloads...")
315
+ return self._download_individual(start_date, end_date)
316
+
317
+ def _download_individual(self, start_date, end_date):
318
+ """Fallback: download tickers individually"""
319
+ data_dict = {}
320
+
321
+ for name, ticker in self.data_sources.items():
322
+ try:
323
+ df = yf.download(ticker, start=start_date, end=end_date,
324
+ progress=False, auto_adjust=True)
325
+
326
+ if not df.empty and 'Close' in df.columns:
327
+ series = df['Close'].squeeze()
328
+ if isinstance(series, pd.DataFrame):
329
+ series = series.iloc[:, 0]
330
+ if isinstance(series, pd.Series) and len(series) > 10:
331
+ data_dict[name] = series
332
+ print(f" ✅ {name}")
333
+ except Exception as e:
334
+ print(f" ❌ {name}: {str(e)[:50]}")
335
+ continue
336
+
337
+ return pd.DataFrame(data_dict)
338
+
339
+ def _download_fred_data(self, start_date, end_date):
340
+ """Download economic data from FRED"""
341
+ economic_data = {}
342
+
343
+ for name, series_id in self.fred_series.items():
344
+ try:
345
+ url = "https://api.stlouisfed.org/fred/series/observations"
346
+ params = {
347
+ 'series_id': series_id,
348
+ 'api_key': self.fred_api_key,
349
+ 'file_type': 'json',
350
+ 'observation_start': start_date,
351
+ 'observation_end': end_date,
352
+ }
353
+
354
+ response = requests.get(url, params=params, timeout=30)
355
+
356
+ if response.status_code == 200:
357
+ data = response.json()
358
+ if 'observations' in data and data['observations']:
359
+ df = pd.DataFrame(data['observations'])
360
+ df['value'] = pd.to_numeric(df['value'], errors='coerce')
361
+ df['date'] = pd.to_datetime(df['date'])
362
+ series = df.set_index('date')['value'].dropna()
363
+
364
+ if len(series) > 10:
365
+ economic_data[name] = series
366
+ print(f" ✅ {name}")
367
+ else:
368
+ print(f" ❌ {name}: HTTP {response.status_code}")
369
+
370
+ except Exception as e:
371
+ print(f" ❌ {name}: {str(e)[:50]}")
372
+ continue
373
+
374
+ return economic_data
375
+
376
+ def _merge_all_data(self, market_data, economic_data, start_date, end_date):
377
+ """Merge all data sources into single DataFrame with proper filling"""
378
+
379
+ # Create base date range (business days)
380
+ date_range = pd.date_range(start=start_date, end=end_date, freq='D')
381
+
382
+ # Start with market data
383
+ unified = market_data.copy()
384
+ unified = unified.reindex(date_range)
385
+
386
+ # Add economic data
387
+ for name, series in economic_data.items():
388
+ unified[name] = series.reindex(date_range)
389
+
390
+ # Forward fill then backward fill (handles weekends/holidays)
391
+ unified = unified.ffill().bfill()
392
+
393
+ # Drop any columns that are still all NaN
394
+ unified = unified.dropna(axis=1, how='all')
395
+
396
+ return unified
397
+
398
+
399
+
400
+ '''
401
+
402
+ # ======================
403
+ # USAGE EXAMPLE
404
+ # ======================
405
+
406
+ if __name__ == "__main__":
407
+ # Initialize downloader
408
+ downloader = UnifiedMarketDataDownloader(fred_api_key=FRED_API_KEY)
409
+
410
+ # Download all data
411
+ raw_market_data = downloader.download_all_data(
412
+ start_date='2018-01-01',
413
+ end_date=None # defaults to today
414
+ )
415
+
416
+
417
+
418
+ # Save to CSV (optional)
419
+ # raw_market_data.to_csv('unified_market_data.csv')
420
+ # print("\n💾 Saved to: unified_market_data.csv")
421
+
422
+ '''