"""
Enhanced data handlers for multiple geospatial data sources
"""
import pandas as pd
import requests
from typing import Dict, List, Optional
import json

class DataEnhancer:
    """
    Additional data sources and enrichment for geospatial queries
    """
    
    @staticmethod
    def get_sample_economic_data():
        """
        Sample economic indicators (in production, connect to World Bank API)
        """
        return {
            'United States': {'gdp_growth': 2.1, 'unemployment': 3.7, 'inflation': 3.2},
            'China': {'gdp_growth': 5.2, 'unemployment': 5.0, 'inflation': 0.2},
            'Germany': {'gdp_growth': 0.1, 'unemployment': 3.0, 'inflation': 6.1},
            'India': {'gdp_growth': 7.2, 'unemployment': 8.0, 'inflation': 5.4},
            'Brazil': {'gdp_growth': 2.9, 'unemployment': 8.5, 'inflation': 4.6},
            'United Kingdom': {'gdp_growth': 0.5, 'unemployment': 3.9, 'inflation': 4.0},
            'France': {'gdp_growth': 0.9, 'unemployment': 7.2, 'inflation': 5.2},
            'Japan': {'gdp_growth': 1.9, 'unemployment': 2.6, 'inflation': 3.2},
            'South Korea': {'gdp_growth': 1.4, 'unemployment': 2.7, 'inflation': 3.6},
            'Canada': {'gdp_growth': 1.1, 'unemployment': 5.4, 'inflation': 3.9}
        }
    
    @staticmethod
    def get_sample_environmental_data():
        """
        Sample environmental indicators
        """
        return {
            'United States': {'co2_per_capita': 15.5, 'renewable_energy': 12.6, 'forest_coverage': 33.9},
            'China': {'co2_per_capita': 7.4, 'renewable_energy': 12.4, 'forest_coverage': 23.0},
            'Germany': {'co2_per_capita': 8.4, 'renewable_energy': 19.3, 'forest_coverage': 32.7},
            'India': {'co2_per_capita': 1.9, 'renewable_energy': 17.5, 'forest_coverage': 24.4},
            'Brazil': {'co2_per_capita': 2.2, 'renewable_energy': 46.1, 'forest_coverage': 59.4},
            'Russia': {'co2_per_capita': 11.4, 'renewable_energy': 5.1, 'forest_coverage': 49.8},
            'Japan': {'co2_per_capita': 8.7, 'renewable_energy': 10.2, 'forest_coverage': 68.5},
            'Australia': {'co2_per_capita': 16.8, 'renewable_energy': 11.9, 'forest_coverage': 17.4}
        }
    
    @staticmethod
    def enrich_dataframe(df: pd.DataFrame, data_type: str = 'economic') -> pd.DataFrame:
        """
        Enrich existing dataframe with additional indicators
        """
        enriched_df = df.copy()
        
        if data_type == 'economic':
            extra_data = DataEnhancer.get_sample_economic_data()
        elif data_type == 'environmental':
            extra_data = DataEnhancer.get_sample_environmental_data()
        else:
            return enriched_df
        
        # Add new columns
        for indicator in ['gdp_growth', 'unemployment', 'inflation',
                            'co2_per_capita', 'renewable_energy', 'forest_coverage']:
            enriched_df[indicator] = enriched_df['name'].map(
                lambda x: extra_data.get(x, {}).get(indicator, None)
            )
        
        return enriched_df
    
    @staticmethod
    def get_regional_aggregates(df: pd.DataFrame) -> pd.DataFrame:
        """
        Calculate regional aggregates
        """
        regional_stats = df.groupby('continent').agg({
            'pop_est': 'sum',
            'gdp_md_est': 'sum',
            'name': 'count'
        }).reset_index()
        
        regional_stats.columns = ['continent', 'total_population', 'total_gdp', 'country_count']
        regional_stats['avg_gdp_per_capita'] = (
            regional_stats['total_gdp'] / regional_stats['total_population'] * 1000000
        )
        
        return regional_stats

class QueryEnhancer:
    """
    Enhance and validate queries
    """
    
    CONTINENT_MAP = {
        'asia': 'Asia',
        'europe': 'Europe',
        'africa': 'Africa',
        'north america': 'North America',
        'south america': 'South America',
        'oceania': 'Oceania',
        'antarctica': 'Antarctica'
    }
    
    COUNTRY_GROUPS = {
        'brics': ['Brazil', 'Russia', 'India', 'China', 'South Africa'],
        'g7': ['United States of America', 'Japan', 'Germany', 'United Kingdom', 
                'France', 'Italy', 'Canada'],
        'asean': ['Indonesia', 'Thailand', 'Philippines', 'Vietnam', 'Myanmar',
                    'Malaysia', 'Singapore', 'Cambodia', 'Laos', 'Brunei'],
        'gcc': ['Saudi Arabia', 'United Arab Emirates', 'Kuwait', 'Qatar', 'Bahrain', 'Oman'],
        'eu': ['Germany', 'France', 'Italy', 'Spain', 'Poland', 'Romania', 'Netherlands',
                'Belgium', 'Greece', 'Portugal', 'Czech Republic', 'Hungary', 'Sweden',
                'Austria', 'Bulgaria', 'Denmark', 'Finland', 'Slovakia', 'Ireland',
                'Croatia', 'Lithuania', 'Slovenia', 'Latvia', 'Estonia', 'Cyprus', 
                'Luxembourg', 'Malta']
    }
    
    @classmethod
    def expand_location(cls, location: str) -> List[str]:
        """
        Expand location strings to actual country/region names
        """
        location_lower = location.lower()
        
        # Check if it's a continent
        if location_lower in cls.CONTINENT_MAP:
            return [cls.CONTINENT_MAP[location_lower]]
        
        # Check if it's a country group
        if location_lower in cls.COUNTRY_GROUPS:
            return cls.COUNTRY_GROUPS[location_lower]
        
        # Return as-is
        return [location]
    
    @classmethod
    def validate_indicators(cls, indicators: List[str]) -> List[str]:
        """
        Validate and normalize indicator names
        """
        valid_indicators = []
        indicator_mapping = {
            'population': 'pop_est',
            'gdp': 'gdp_md_est',
            'density': 'pop_density',
            'per capita': 'gdp_per_capita',
            'co2': 'co2_per_capita',
            'renewable': 'renewable_energy',
            'forest': 'forest_coverage',
            'growth': 'gdp_growth',
            'unemployment': 'unemployment',
            'inflation': 'inflation'
        }
        
        for indicator in indicators:
            indicator_lower = indicator.lower()
            for key, value in indicator_mapping.items():
                if key in indicator_lower:
                    valid_indicators.append(value)
                    break
            else:
                valid_indicators.append('pop_est')  # default
        
        return list(set(valid_indicators))  # Remove duplicates

# Statistical analysis utilities
class GeoStats:
    """
    Statistical analysis for geospatial data
    """
    
    @staticmethod
    def calculate_correlation(df: pd.DataFrame, col1: str, col2: str) -> float:
        """
        Calculate correlation between two indicators
        """
        try:
            return df[[col1, col2]].corr().iloc[0, 1]
        except:
            return 0.0
    
    @staticmethod
    def get_outliers(df: pd.DataFrame, column: str) -> pd.DataFrame:
        """
        Identify outliers using IQR method
        """
        Q1 = df[column].quantile(0.25)
        Q3 = df[column].quantile(0.75)
        IQR = Q3 - Q1
        
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        
        outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)]
        return outliers
    
    @staticmethod
    def generate_summary_stats(df: pd.DataFrame, column: str) -> Dict:
        """
        Generate summary statistics for a column
        """
        return {
            'mean': df[column].mean(),
            'median': df[column].median(),
            'std': df[column].std(),
            'min': df[column].min(),
            'max': df[column].max(),
            'count': df[column].count()
        }