# geo_macro.py

import pandas as pd
import numpy as np
import yfinance as yf
import requests
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# ======================
# CONFIGURATION
# ======================

# Add your FRED API key here
FRED_API_KEY = '23f3511b0ca43918ccd503ef64cb844e' 


# ======================
# UNIFIED DATA DOWNLOADER
# ======================

class UnifiedMarketDataDownloader:
    """Downloads all market data into a single DataFrame"""
    
    def __init__(self, fred_api_key=None):
        self.fred_api_key = fred_api_key
        
        # All market data sources
        self.data_sources = {
            # US Rates & Currencies
            'DGS10': '^TNX', 
            'DGS2': '^FVX', 
            'DGS3MO': '^IRX',
            'DXY': 'DX-Y.NYB', 
            'EURUSD': 'EURUSD=X', 
            'JPYUSD': 'JPYUSD=X',
            
            # US Equity Indices
            'SP500': '^GSPC', 
            'NASDAQ': '^IXIC', 
            'RUSSELL': '^RUT',
            'DJI': '^DJI', 
            'VIX': '^VIX', 
            'VXN': '^VXN',
            
            # Commodities
            'Gold': 'GC=F', 
            'Oil': 'CL=F', 
            'Copper': 'HG=F',
            'Silver': 'SI=F', 
            'NaturalGas': 'NG=F',
            
            # Credit & Fixed Income
            'HYG': 'HYG', 
            'JNK': 'JNK', 
            'LQD': 'LQD',
            'TIP': 'TIP', 
            'TLT': 'TLT',
            
            # Global Markets
            'China': 'FXI',
            'China_Tech': 'KWEB',
            'Europe': 'FEZ',
            'Europe_Financials': 'EUFN',
            'Japan': 'EWJ',
            'South_Korea': 'EWY',
            'Taiwan': 'EWT',
            'India': 'INDA',
            'Brazil': 'EWZ',
            'Emerging_Markets': 'EEM',
            
            # Global Currencies
            'CNY': 'CNY=X',
            'JPY': 'JPY=X',
            'EUR': 'EUR=X',
            'GBP': 'GBP=X',
            
            # Geopolitical Indicators
            'Defense_Stocks': 'ITA',
            'Cybersecurity': 'HACK',
            'Energy_Security': 'XLE',
            'Gold_Safe_Haven': 'GLD',
            'US_Treasuries_Long': 'TLT',
            
            # Sectors
            'Technology': 'XLK',
            'Financials': 'XLF',
            'Healthcare': 'XLV',
            'Consumer_Discretionary': 'XLY',
            'Consumer_Staples': 'XLP',
            'Energy': 'XLE',
            'Materials': 'XLB',
            'Industrials': 'XLI',
            'Utilities': 'XLU',
            'Real_Estate': 'XLRE',
            'Communication_Services': 'XLC',
            
            # Sector Details
            'Regional_Banks': 'KRE',
            'Homebuilders': 'XHB',
            'Retail': 'XRT',
            'Transportation': 'XTN',
            'Semiconductors': 'SMH',
            'Clean_Energy': 'ICLN',
            'Aerospace_Defense': 'XAR',
            
            # Supply Chain & Logistics
            'Baltic_Dry_Index': 'BDRY',  # Shipping costs proxy
            'Logistics': 'XTN',
            
            # Credit Spreads & Risk
            'Investment_Grade_Spread': 'LQD',  # Already have, key for credit risk
            'Emerging_Market_Debt': 'EMB',
            'Muni_Bonds': 'MUB',
            
            # Inflation Breakevens
            'Inflation_Protected': 'TIP',  # Already have
            'Short_Term_Treasuries': 'SHY',
            'Intermediate_Treasuries': 'IEF',
            
            # Currency Volatility
            'USD_Emerging': 'UUP',  # USD strength
            'Gold_Miners': 'GDX',  # Gold mining companies (more volatile than GLD)
            
            # Economic Cycle Indicators
            'Small_Cap_Value': 'IWN',  # Early cycle indicator
            'High_Dividend': 'VYM',  # Late cycle/defensive
            'Growth_Stocks': 'VUG',  # Risk-on
            'Value_Stocks': 'VTV',  # Risk-off rotation
            
            # Liquidity & Credit Conditions
            'Mortgage_REITs': 'REM',  # Interest rate sensitivity
            'Preferred_Stock': 'PFF',  # Credit conditions
            
            # Global Safe Havens
            'Swiss_Franc': 'CHF=X',
            'Gold_Futures': 'GC=F',  # Already have as 'Gold'
            'Bitcoin': 'BTC-USD',  # Alternative safe haven / risk asset
            
            # Commodity Inflation
            'Agricultural': 'DBA',
            'Base_Metals': 'DBB',
            'Crude_Oil': 'USO',
            
            # Labor Market
            'Staffing': 'SIA',  # Staffing index (leading indicator)
            
            # Housing Market
            'Mortgage_Backed_Securities': 'MBB',
            'REITs': 'VNQ',
            
            # Consumer Health
            'Consumer_Discretionary_vs_Staples': 'XLY',  # Already have
            'Restaurants': 'EAT',  # Consumer spending proxy
            'Retail_Luxury': 'RL',  # High-end consumer
            
            # Tech Innovation Cycles
            'Cloud_Computing': 'SKYY',
            'Robotics_AI': 'BOTZ',
            'Fintech': 'FINX',
            
            # Geopolitical Tension Proxies
            'Uranium': 'URA',  # Nuclear/energy security
            'Rare_Earth': 'REMX',  # Supply chain geopolitics
            'Water': 'PHO',  # Resource scarcity
            
            # Sentiment & Positioning
            'Leveraged_Loans': 'BKLN',  # Credit appetite
            'TIPS_Spread': 'TIP',  # Inflation expectations
        }
        
        # FRED Economic Series
        self.fred_series = {
            # Labor Market
            'UNRATE': 'UNRATE',           # Unemployment Rate
            'PAYEMS': 'PAYEMS',           # Non-Farm Payrolls
            'ICSA': 'ICSA',               # Initial Jobless Claims
            'JTSJOL': 'JTSJOL',           # Job Openings (JOLTS)
            
            # Inflation
            'CPIAUCSL': 'CPIAUCSL',       # CPI
            'CPILFESL': 'CPILFESL',       # Core CPI
            'PPIACO': 'PPIACO',           # PPI
            'PCEPILFE': 'PCEPILFE',       # Core PCE (Fed's preferred)
            
            # Production & Manufacturing
            'INDPRO': 'INDPRO',           # Industrial Production
            'IPMAN': 'IPMAN',             # Manufacturing Production
            'TOTALSA': 'TOTALSA',         # Total Vehicle Sales
            'UMTMVS': 'UMTMVS',           # Manufacturing New Orders
            
            # Money & Credit
            'M2': 'M2SL',                 # M2 Money Supply
            'WALCL': 'WALCL',             # Fed Balance Sheet
            'TOTCI': 'TOTCI',             # Commercial & Industrial Loans
            
            # Consumer
            'CONSUMER_SENTIMENT': 'UMCSENT',  # Consumer Sentiment
            'RSXFS': 'RSXFS',             # Retail Sales
            'PCE': 'PCE',                 # Personal Consumption
            'PSAVERT': 'PSAVERT',         # Personal Saving Rate
            
            # Housing
            'HOUST': 'HOUST',             # Housing Starts
            'MORTGAGE30US': 'MORTGAGE30US',  # 30-Year Mortgage Rate
            'CSUSHPISA': 'CSUSHPISA',     # Case-Shiller Home Price Index
            
            # Trade & GDP
            'GDP': 'GDP',                 # GDP
            'NETEXP': 'NETEXP',           # Net Exports
            'BOPGSTB': 'BOPGSTB',         # Trade Balance
            
            # Yield Curve & Credit
            'T10Y2Y': 'T10Y2Y',           # 10Y-2Y Yield Spread (recession indicator)
            'T10YIE': 'T10YIE',           # 10Y Breakeven Inflation Rate
            'BAMLH0A0HYM2': 'BAMLH0A0HYM2',  # High Yield Spread
            'DFII10': 'DFII10',           # 10-Year TIPS
            
            # Leading Indicators
            'USSLIND': 'USSLIND',         # Leading Index
            'DCOILWTICO': 'DCOILWTICO',   # WTI Crude Oil Price
            
            # Corporate & Business
            'CPROFIT': 'CPROFIT',         # Corporate Profits
            'BUSLOANS': 'BUSLOANS',       # Business Loans
        }
    
    def download_all_data(self, start_date='2018-01-01', end_date=None):
        """
        Download all market data and return a single unified DataFrame
        
        Parameters:
        -----------
        start_date : str
            Start date in 'YYYY-MM-DD' format
        end_date : str, optional
            End date in 'YYYY-MM-DD' format (defaults to today)
            
        Returns:
        --------
        pd.DataFrame
            Unified DataFrame with all market data, date-indexed
        """
        if end_date is None:
            end_date = datetime.now().strftime('%Y-%m-%d')
        
        print("=" * 80)
        print("🚀 UNIFIED MARKET DATA DOWNLOAD")
        print("=" * 80)
        print(f"📅 Period: {start_date} to {end_date}\n")
        
        # Step 1: Download all market data
        print("📊 Downloading Market Data...")
        market_data = self._download_market_data(start_date, end_date)
        
        # Step 2: Download FRED economic data
        if self.fred_api_key and self.fred_api_key != 'your_api_key_here':
            print("\n📈 Downloading Economic Data (FRED)...")
            economic_data = self._download_fred_data(start_date, end_date)
        else:
            print("\n⚠️  FRED API key not provided - skipping economic data")
            print("   Get your free API key at: https://fred.stlouisfed.org/docs/api/api_key.html")
            economic_data = {}
        
        # Step 3: Merge everything into single DataFrame
        print("\n🔗 Merging all data sources...")
        unified_df = self._merge_all_data(market_data, economic_data, start_date, end_date)
        
        # Print summary
        print("\n" + "=" * 80)
        print("✅ DOWNLOAD COMPLETE")
        print("=" * 80)
        print(f"📊 Total Columns: {unified_df.shape[1]}")
        print(f"📅 Total Rows: {unified_df.shape[0]}")
        print(f"🗓️  Date Range: {unified_df.index.min().strftime('%Y-%m-%d')} to {unified_df.index.max().strftime('%Y-%m-%d')}")
        print(f"📉 Missing Values: {unified_df.isnull().sum().sum()}")
        print(f"✨ Completeness: {(1 - unified_df.isnull().sum().sum()/(unified_df.shape[0]*unified_df.shape[1]))*100:.2f}%")
        
        print("\n📋 Column Preview:")
        print(unified_df.columns.tolist()[:10], "...\n")
        
        return unified_df
    
    def _download_market_data(self, start_date, end_date):
        """Download all market data from Yahoo Finance"""
        all_tickers = list(self.data_sources.values())
        
        print(f"   Downloading {len(all_tickers)} tickers...")
        
        # Download all at once (faster)
        try:
            data = yf.download(all_tickers, start=start_date, end=end_date, 
                             progress=False, auto_adjust=True, threads=True)
            
            # Extract Close prices
            if isinstance(data.columns, pd.MultiIndex):
                close_data = data['Close']
            else:
                close_data = data[['Close']] if 'Close' in data.columns else data
            
            # Rename columns to our names
            ticker_to_name = {v: k for k, v in self.data_sources.items()}
            close_data.columns = [ticker_to_name.get(col, col) for col in close_data.columns]
            
            # Remove any columns that failed to download
            close_data = close_data(axis=1, how='all')
            
            print(f"   ✅ Successfully downloaded {len(close_data.columns)} series")
            return close_data
            
        except Exception as e:
            print(f"   ⚠️  Batch download failed: {e}")
            print("   Trying individual downloads...")
            return self._download_individual(start_date, end_date)
    
    def _download_individual(self, start_date, end_date):
        """Fallback: download tickers individually"""
        data_dict = {}
        
        for name, ticker in self.data_sources.items():
            try:
                df = yf.download(ticker, start=start_date, end=end_date, 
                               progress=False, auto_adjust=True)
                
                if not df.empty and 'Close' in df.columns:
                    series = df['Close'].squeeze()
                    if isinstance(series, pd.DataFrame):
                        series = series.iloc[:, 0]
                    if isinstance(series, pd.Series) and len(series) > 10:
                        data_dict[name] = series
                        print(f"   ✅ {name}")
            except Exception as e:
                print(f"   ❌ {name}: {str(e)[:50]}")
                continue
        
        return pd.DataFrame(data_dict)
    
    def _download_fred_data(self, start_date, end_date):
        """Download economic data from FRED"""
        economic_data = {}
        
        for name, series_id in self.fred_series.items():
            try:
                url = "https://api.stlouisfed.org/fred/series/observations"
                params = {
                    'series_id': series_id,
                    'api_key': self.fred_api_key,
                    'file_type': 'json',
                    'observation_start': start_date,
                    'observation_end': end_date,
                }
                
                response = requests.get(url, params=params, timeout=30)
                
                if response.status_code == 200:
                    data = response.json()
                    if 'observations' in data and data['observations']:
                        df = pd.DataFrame(data['observations'])
                        df['value'] = pd.to_numeric(df['value'], errors='coerce')
                        df['date'] = pd.to_datetime(df['date'])
                        series = df.set_index('date')['value']
                        
                        if len(series) > 10:
                            economic_data[name] = series
                            print(f"   ✅ {name}")
                else:
                    print(f"   ❌ {name}: HTTP {response.status_code}")
                    
            except Exception as e:
                print(f"   ❌ {name}: {str(e)[:50]}")
                continue
        
        return economic_data

    def _merge_all_data(self, market_data, economic_data, start_date, end_date):
        """Merge all data sources into single DataFrame with ffill + bfill"""
        
        # Create a full daily date range (including weekends/holidays)
        date_range = pd.date_range(start=start_date, end=end_date, freq='D')
        
        # Initialize unified DataFrame with full date index
        unified = pd.DataFrame(index=date_range)
        
        # Add market data (already date-indexed from yfinance)
        for col in market_data.columns:
            unified[col] = market_data[col].reindex(date_range)
        
        # Add FRED economic data
        for name, series in economic_data.items():
            unified[name] = series.reindex(date_range)
        
        # Forward-fill, then backward-fill to handle leading/trailing NaNs
        unified = unified.ffill().bfill()
        
        # Remove any columns that are still entirely NaN (e.g., failed downloads)
        unified = unified.dropna(axis=1, how='all')
        
        return unified

'''

# ======================
# USAGE EXAMPLE
# ======================

if __name__ == "__main__":
    # Initialize downloader
    downloader = UnifiedMarketDataDownloader(fred_api_key=FRED_API_KEY)
    
    # Download all data
    raw_market_data = downloader.download_all_data(
        start_date='2018-01-01',
        end_date=None  # defaults to today
    )
    

    # Save to CSV (optional)
    # raw_market_data.to_csv('unified_market_data.csv')
    # print("\n💾 Saved to: unified_market_data.csv")

'''