Spaces:

JayLacoma
/

Geopolitics-Risk-Analysis

Sleeping

App Files Files Community

Geopolitics-Risk-Analysis / geo_macro.py

JayLacoma

Update geo_macro.py

87076e2 verified 3 months ago

raw

history blame contribute delete

16 kB

	# geo_macro.py

	import pandas as pd
	import numpy as np
	import yfinance as yf
	import requests
	from datetime import datetime
	import warnings
	warnings.filterwarnings('ignore')

	# ======================
	# CONFIGURATION
	# ======================

	# Add your FRED API key here
	FRED_API_KEY = '23f3511b0ca43918ccd503ef64cb844e'


	# ======================
	# UNIFIED DATA DOWNLOADER
	# ======================

	class UnifiedMarketDataDownloader:
	"""Downloads all market data into a single DataFrame"""

	def __init__(self, fred_api_key=None):
	self.fred_api_key = fred_api_key

	# All market data sources
	self.data_sources = {
	# US Rates & Currencies
	'DGS10': '^TNX',
	'DGS2': '^FVX',
	'DGS3MO': '^IRX',
	'DXY': 'DX-Y.NYB',
	'EURUSD': 'EURUSD=X',
	'JPYUSD': 'JPYUSD=X',

	# US Equity Indices
	'SP500': '^GSPC',
	'NASDAQ': '^IXIC',
	'RUSSELL': '^RUT',
	'DJI': '^DJI',
	'VIX': '^VIX',
	'VXN': '^VXN',

	# Commodities
	'Gold': 'GC=F',
	'Oil': 'CL=F',
	'Copper': 'HG=F',
	'Silver': 'SI=F',
	'NaturalGas': 'NG=F',

	# Credit & Fixed Income
	'HYG': 'HYG',
	'JNK': 'JNK',
	'LQD': 'LQD',
	'TIP': 'TIP',
	'TLT': 'TLT',

	# Global Markets
	'China': 'FXI',
	'China_Tech': 'KWEB',
	'Europe': 'FEZ',
	'Europe_Financials': 'EUFN',
	'Japan': 'EWJ',
	'South_Korea': 'EWY',
	'Taiwan': 'EWT',
	'India': 'INDA',
	'Brazil': 'EWZ',
	'Emerging_Markets': 'EEM',

	# Global Currencies
	'CNY': 'CNY=X',
	'JPY': 'JPY=X',
	'EUR': 'EUR=X',
	'GBP': 'GBP=X',

	# Geopolitical Indicators
	'Defense_Stocks': 'ITA',
	'Cybersecurity': 'HACK',
	'Energy_Security': 'XLE',
	'Gold_Safe_Haven': 'GLD',
	'US_Treasuries_Long': 'TLT',

	# Sectors
	'Technology': 'XLK',
	'Financials': 'XLF',
	'Healthcare': 'XLV',
	'Consumer_Discretionary': 'XLY',
	'Consumer_Staples': 'XLP',
	'Energy': 'XLE',
	'Materials': 'XLB',
	'Industrials': 'XLI',
	'Utilities': 'XLU',
	'Real_Estate': 'XLRE',
	'Communication_Services': 'XLC',

	# Sector Details
	'Regional_Banks': 'KRE',
	'Homebuilders': 'XHB',
	'Retail': 'XRT',
	'Transportation': 'XTN',
	'Semiconductors': 'SMH',
	'Clean_Energy': 'ICLN',
	'Aerospace_Defense': 'XAR',

	# Supply Chain & Logistics
	'Baltic_Dry_Index': 'BDRY', # Shipping costs proxy
	'Logistics': 'XTN',

	# Credit Spreads & Risk
	'Investment_Grade_Spread': 'LQD', # Already have, key for credit risk
	'Emerging_Market_Debt': 'EMB',
	'Muni_Bonds': 'MUB',

	# Inflation Breakevens
	'Inflation_Protected': 'TIP', # Already have
	'Short_Term_Treasuries': 'SHY',
	'Intermediate_Treasuries': 'IEF',

	# Currency Volatility
	'USD_Emerging': 'UUP', # USD strength
	'Gold_Miners': 'GDX', # Gold mining companies (more volatile than GLD)

	# Economic Cycle Indicators
	'Small_Cap_Value': 'IWN', # Early cycle indicator
	'High_Dividend': 'VYM', # Late cycle/defensive
	'Growth_Stocks': 'VUG', # Risk-on
	'Value_Stocks': 'VTV', # Risk-off rotation

	# Liquidity & Credit Conditions
	'Mortgage_REITs': 'REM', # Interest rate sensitivity
	'Preferred_Stock': 'PFF', # Credit conditions

	# Global Safe Havens
	'Swiss_Franc': 'CHF=X',
	'Gold_Futures': 'GC=F', # Already have as 'Gold'
	'Bitcoin': 'BTC-USD', # Alternative safe haven / risk asset

	# Commodity Inflation
	'Agricultural': 'DBA',
	'Base_Metals': 'DBB',
	'Crude_Oil': 'USO',

	# Labor Market
	'Staffing': 'SIA', # Staffing index (leading indicator)

	# Housing Market
	'Mortgage_Backed_Securities': 'MBB',
	'REITs': 'VNQ',

	# Consumer Health
	'Consumer_Discretionary_vs_Staples': 'XLY', # Already have
	'Restaurants': 'EAT', # Consumer spending proxy
	'Retail_Luxury': 'RL', # High-end consumer

	# Tech Innovation Cycles
	'Cloud_Computing': 'SKYY',
	'Robotics_AI': 'BOTZ',
	'Fintech': 'FINX',

	# Geopolitical Tension Proxies
	'Uranium': 'URA', # Nuclear/energy security
	'Rare_Earth': 'REMX', # Supply chain geopolitics
	'Water': 'PHO', # Resource scarcity

	# Sentiment & Positioning
	'Leveraged_Loans': 'BKLN', # Credit appetite
	'TIPS_Spread': 'TIP', # Inflation expectations
	}

	# FRED Economic Series
	self.fred_series = {
	# Labor Market
	'UNRATE': 'UNRATE', # Unemployment Rate
	'PAYEMS': 'PAYEMS', # Non-Farm Payrolls
	'ICSA': 'ICSA', # Initial Jobless Claims
	'JTSJOL': 'JTSJOL', # Job Openings (JOLTS)

	# Inflation
	'CPIAUCSL': 'CPIAUCSL', # CPI
	'CPILFESL': 'CPILFESL', # Core CPI
	'PPIACO': 'PPIACO', # PPI
	'PCEPILFE': 'PCEPILFE', # Core PCE (Fed's preferred)

	# Production & Manufacturing
	'INDPRO': 'INDPRO', # Industrial Production
	'IPMAN': 'IPMAN', # Manufacturing Production
	'TOTALSA': 'TOTALSA', # Total Vehicle Sales
	'UMTMVS': 'UMTMVS', # Manufacturing New Orders

	# Money & Credit
	'M2': 'M2SL', # M2 Money Supply
	'WALCL': 'WALCL', # Fed Balance Sheet
	'TOTCI': 'TOTCI', # Commercial & Industrial Loans

	# Consumer
	'CONSUMER_SENTIMENT': 'UMCSENT', # Consumer Sentiment
	'RSXFS': 'RSXFS', # Retail Sales
	'PCE': 'PCE', # Personal Consumption
	'PSAVERT': 'PSAVERT', # Personal Saving Rate

	# Housing
	'HOUST': 'HOUST', # Housing Starts
	'MORTGAGE30US': 'MORTGAGE30US', # 30-Year Mortgage Rate
	'CSUSHPISA': 'CSUSHPISA', # Case-Shiller Home Price Index

	# Trade & GDP
	'GDP': 'GDP', # GDP
	'NETEXP': 'NETEXP', # Net Exports
	'BOPGSTB': 'BOPGSTB', # Trade Balance

	# Yield Curve & Credit
	'T10Y2Y': 'T10Y2Y', # 10Y-2Y Yield Spread (recession indicator)
	'T10YIE': 'T10YIE', # 10Y Breakeven Inflation Rate
	'BAMLH0A0HYM2': 'BAMLH0A0HYM2', # High Yield Spread
	'DFII10': 'DFII10', # 10-Year TIPS

	# Leading Indicators
	'USSLIND': 'USSLIND', # Leading Index
	'DCOILWTICO': 'DCOILWTICO', # WTI Crude Oil Price

	# Corporate & Business
	'CPROFIT': 'CPROFIT', # Corporate Profits
	'BUSLOANS': 'BUSLOANS', # Business Loans
	}

	def download_all_data(self, start_date='2018-01-01', end_date=None):
	"""
	Download all market data and return a single unified DataFrame

	Parameters:
	-----------
	start_date : str
	Start date in 'YYYY-MM-DD' format
	end_date : str, optional
	End date in 'YYYY-MM-DD' format (defaults to today)

	Returns:
	--------
	pd.DataFrame
	Unified DataFrame with all market data, date-indexed
	"""
	if end_date is None:
	end_date = datetime.now().strftime('%Y-%m-%d')

	print("=" * 80)
	print("🚀 UNIFIED MARKET DATA DOWNLOAD")
	print("=" * 80)
	print(f"📅 Period: {start_date} to {end_date}\n")

	# Step 1: Download all market data
	print("📊 Downloading Market Data...")
	market_data = self._download_market_data(start_date, end_date)

	# Step 2: Download FRED economic data
	if self.fred_api_key and self.fred_api_key != 'your_api_key_here':
	print("\n📈 Downloading Economic Data (FRED)...")
	economic_data = self._download_fred_data(start_date, end_date)
	else:
	print("\n⚠️ FRED API key not provided - skipping economic data")
	print(" Get your free API key at: https://fred.stlouisfed.org/docs/api/api_key.html")
	economic_data = {}

	# Step 3: Merge everything into single DataFrame
	print("\n🔗 Merging all data sources...")
	unified_df = self._merge_all_data(market_data, economic_data, start_date, end_date)

	# Print summary
	print("\n" + "=" * 80)
	print("✅ DOWNLOAD COMPLETE")
	print("=" * 80)
	print(f"📊 Total Columns: {unified_df.shape[1]}")
	print(f"📅 Total Rows: {unified_df.shape[0]}")
	print(f"🗓️ Date Range: {unified_df.index.min().strftime('%Y-%m-%d')} to {unified_df.index.max().strftime('%Y-%m-%d')}")
	print(f"📉 Missing Values: {unified_df.isnull().sum().sum()}")
	print(f"✨ Completeness: {(1 - unified_df.isnull().sum().sum()/(unified_df.shape[0]unified_df.shape[1]))100:.2f}%")

	print("\n📋 Column Preview:")
	print(unified_df.columns.tolist()[:10], "...\n")

	return unified_df

	def _download_market_data(self, start_date, end_date):
	"""Download all market data from Yahoo Finance"""
	all_tickers = list(self.data_sources.values())

	print(f" Downloading {len(all_tickers)} tickers...")

	# Download all at once (faster)
	try:
	data = yf.download(all_tickers, start=start_date, end=end_date,
	progress=False, auto_adjust=True, threads=True)

	# Extract Close prices
	if isinstance(data.columns, pd.MultiIndex):
	close_data = data['Close']
	else:
	close_data = data[['Close']] if 'Close' in data.columns else data

	# Rename columns to our names
	ticker_to_name = {v: k for k, v in self.data_sources.items()}
	close_data.columns = [ticker_to_name.get(col, col) for col in close_data.columns]

	# Remove any columns that failed to download
	close_data = close_data(axis=1, how='all')

	print(f" ✅ Successfully downloaded {len(close_data.columns)} series")
	return close_data

	except Exception as e:
	print(f" ⚠️ Batch download failed: {e}")
	print(" Trying individual downloads...")
	return self._download_individual(start_date, end_date)

	def _download_individual(self, start_date, end_date):
	"""Fallback: download tickers individually"""
	data_dict = {}

	for name, ticker in self.data_sources.items():
	try:
	df = yf.download(ticker, start=start_date, end=end_date,
	progress=False, auto_adjust=True)

	if not df.empty and 'Close' in df.columns:
	series = df['Close'].squeeze()
	if isinstance(series, pd.DataFrame):
	series = series.iloc[:, 0]
	if isinstance(series, pd.Series) and len(series) > 10:
	data_dict[name] = series
	print(f" ✅ {name}")
	except Exception as e:
	print(f" ❌ {name}: {str(e)[:50]}")
	continue

	return pd.DataFrame(data_dict)

	def _download_fred_data(self, start_date, end_date):
	"""Download economic data from FRED"""
	economic_data = {}

	for name, series_id in self.fred_series.items():
	try:
	url = "https://api.stlouisfed.org/fred/series/observations"
	params = {
	'series_id': series_id,
	'api_key': self.fred_api_key,
	'file_type': 'json',
	'observation_start': start_date,
	'observation_end': end_date,
	}

	response = requests.get(url, params=params, timeout=30)

	if response.status_code == 200:
	data = response.json()
	if 'observations' in data and data['observations']:
	df = pd.DataFrame(data['observations'])
	df['value'] = pd.to_numeric(df['value'], errors='coerce')
	df['date'] = pd.to_datetime(df['date'])
	series = df.set_index('date')['value']

	if len(series) > 10:
	economic_data[name] = series
	print(f" ✅ {name}")
	else:
	print(f" ❌ {name}: HTTP {response.status_code}")

	except Exception as e:
	print(f" ❌ {name}: {str(e)[:50]}")
	continue

	return economic_data

	def _merge_all_data(self, market_data, economic_data, start_date, end_date):
	"""Merge all data sources into single DataFrame with ffill + bfill"""

	# Create a full daily date range (including weekends/holidays)
	date_range = pd.date_range(start=start_date, end=end_date, freq='D')

	# Initialize unified DataFrame with full date index
	unified = pd.DataFrame(index=date_range)

	# Add market data (already date-indexed from yfinance)
	for col in market_data.columns:
	unified[col] = market_data[col].reindex(date_range)

	# Add FRED economic data
	for name, series in economic_data.items():
	unified[name] = series.reindex(date_range)

	# Forward-fill, then backward-fill to handle leading/trailing NaNs
	unified = unified.ffill().bfill()

	# Remove any columns that are still entirely NaN (e.g., failed downloads)
	unified = unified.dropna(axis=1, how='all')

	return unified

	'''

	# ======================
	# USAGE EXAMPLE
	# ======================

	if __name__ == "__main__":
	# Initialize downloader
	downloader = UnifiedMarketDataDownloader(fred_api_key=FRED_API_KEY)

	# Download all data
	raw_market_data = downloader.download_all_data(
	start_date='2018-01-01',
	end_date=None # defaults to today
	)



	# Save to CSV (optional)
	# raw_market_data.to_csv('unified_market_data.csv')
	# print("\n💾 Saved to: unified_market_data.csv")

	'''