""" Enhanced data handlers for multiple geospatial data sources """ import pandas as pd import requests from typing import Dict, List, Optional import json class DataEnhancer: """ Additional data sources and enrichment for geospatial queries """ @staticmethod def get_sample_economic_data(): """ Sample economic indicators (in production, connect to World Bank API) """ return { 'United States': {'gdp_growth': 2.1, 'unemployment': 3.7, 'inflation': 3.2}, 'China': {'gdp_growth': 5.2, 'unemployment': 5.0, 'inflation': 0.2}, 'Germany': {'gdp_growth': 0.1, 'unemployment': 3.0, 'inflation': 6.1}, 'India': {'gdp_growth': 7.2, 'unemployment': 8.0, 'inflation': 5.4}, 'Brazil': {'gdp_growth': 2.9, 'unemployment': 8.5, 'inflation': 4.6}, 'United Kingdom': {'gdp_growth': 0.5, 'unemployment': 3.9, 'inflation': 4.0}, 'France': {'gdp_growth': 0.9, 'unemployment': 7.2, 'inflation': 5.2}, 'Japan': {'gdp_growth': 1.9, 'unemployment': 2.6, 'inflation': 3.2}, 'South Korea': {'gdp_growth': 1.4, 'unemployment': 2.7, 'inflation': 3.6}, 'Canada': {'gdp_growth': 1.1, 'unemployment': 5.4, 'inflation': 3.9} } @staticmethod def get_sample_environmental_data(): """ Sample environmental indicators """ return { 'United States': {'co2_per_capita': 15.5, 'renewable_energy': 12.6, 'forest_coverage': 33.9}, 'China': {'co2_per_capita': 7.4, 'renewable_energy': 12.4, 'forest_coverage': 23.0}, 'Germany': {'co2_per_capita': 8.4, 'renewable_energy': 19.3, 'forest_coverage': 32.7}, 'India': {'co2_per_capita': 1.9, 'renewable_energy': 17.5, 'forest_coverage': 24.4}, 'Brazil': {'co2_per_capita': 2.2, 'renewable_energy': 46.1, 'forest_coverage': 59.4}, 'Russia': {'co2_per_capita': 11.4, 'renewable_energy': 5.1, 'forest_coverage': 49.8}, 'Japan': {'co2_per_capita': 8.7, 'renewable_energy': 10.2, 'forest_coverage': 68.5}, 'Australia': {'co2_per_capita': 16.8, 'renewable_energy': 11.9, 'forest_coverage': 17.4} } @staticmethod def enrich_dataframe(df: pd.DataFrame, data_type: str = 'economic') -> pd.DataFrame: """ Enrich existing dataframe with additional indicators """ enriched_df = df.copy() if data_type == 'economic': extra_data = DataEnhancer.get_sample_economic_data() elif data_type == 'environmental': extra_data = DataEnhancer.get_sample_environmental_data() else: return enriched_df # Add new columns for indicator in ['gdp_growth', 'unemployment', 'inflation', 'co2_per_capita', 'renewable_energy', 'forest_coverage']: enriched_df[indicator] = enriched_df['name'].map( lambda x: extra_data.get(x, {}).get(indicator, None) ) return enriched_df @staticmethod def get_regional_aggregates(df: pd.DataFrame) -> pd.DataFrame: """ Calculate regional aggregates """ regional_stats = df.groupby('continent').agg({ 'pop_est': 'sum', 'gdp_md_est': 'sum', 'name': 'count' }).reset_index() regional_stats.columns = ['continent', 'total_population', 'total_gdp', 'country_count'] regional_stats['avg_gdp_per_capita'] = ( regional_stats['total_gdp'] / regional_stats['total_population'] * 1000000 ) return regional_stats class QueryEnhancer: """ Enhance and validate queries """ CONTINENT_MAP = { 'asia': 'Asia', 'europe': 'Europe', 'africa': 'Africa', 'north america': 'North America', 'south america': 'South America', 'oceania': 'Oceania', 'antarctica': 'Antarctica' } COUNTRY_GROUPS = { 'brics': ['Brazil', 'Russia', 'India', 'China', 'South Africa'], 'g7': ['United States of America', 'Japan', 'Germany', 'United Kingdom', 'France', 'Italy', 'Canada'], 'asean': ['Indonesia', 'Thailand', 'Philippines', 'Vietnam', 'Myanmar', 'Malaysia', 'Singapore', 'Cambodia', 'Laos', 'Brunei'], 'gcc': ['Saudi Arabia', 'United Arab Emirates', 'Kuwait', 'Qatar', 'Bahrain', 'Oman'], 'eu': ['Germany', 'France', 'Italy', 'Spain', 'Poland', 'Romania', 'Netherlands', 'Belgium', 'Greece', 'Portugal', 'Czech Republic', 'Hungary', 'Sweden', 'Austria', 'Bulgaria', 'Denmark', 'Finland', 'Slovakia', 'Ireland', 'Croatia', 'Lithuania', 'Slovenia', 'Latvia', 'Estonia', 'Cyprus', 'Luxembourg', 'Malta'] } @classmethod def expand_location(cls, location: str) -> List[str]: """ Expand location strings to actual country/region names """ location_lower = location.lower() # Check if it's a continent if location_lower in cls.CONTINENT_MAP: return [cls.CONTINENT_MAP[location_lower]] # Check if it's a country group if location_lower in cls.COUNTRY_GROUPS: return cls.COUNTRY_GROUPS[location_lower] # Return as-is return [location] @classmethod def validate_indicators(cls, indicators: List[str]) -> List[str]: """ Validate and normalize indicator names """ valid_indicators = [] indicator_mapping = { 'population': 'pop_est', 'gdp': 'gdp_md_est', 'density': 'pop_density', 'per capita': 'gdp_per_capita', 'co2': 'co2_per_capita', 'renewable': 'renewable_energy', 'forest': 'forest_coverage', 'growth': 'gdp_growth', 'unemployment': 'unemployment', 'inflation': 'inflation' } for indicator in indicators: indicator_lower = indicator.lower() for key, value in indicator_mapping.items(): if key in indicator_lower: valid_indicators.append(value) break else: valid_indicators.append('pop_est') # default return list(set(valid_indicators)) # Remove duplicates # Statistical analysis utilities class GeoStats: """ Statistical analysis for geospatial data """ @staticmethod def calculate_correlation(df: pd.DataFrame, col1: str, col2: str) -> float: """ Calculate correlation between two indicators """ try: return df[[col1, col2]].corr().iloc[0, 1] except: return 0.0 @staticmethod def get_outliers(df: pd.DataFrame, column: str) -> pd.DataFrame: """ Identify outliers using IQR method """ Q1 = df[column].quantile(0.25) Q3 = df[column].quantile(0.75) IQR = Q3 - Q1 lower_bound = Q1 - 1.5 * IQR upper_bound = Q3 + 1.5 * IQR outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)] return outliers @staticmethod def generate_summary_stats(df: pd.DataFrame, column: str) -> Dict: """ Generate summary statistics for a column """ return { 'mean': df[column].mean(), 'median': df[column].median(), 'std': df[column].std(), 'min': df[column].min(), 'max': df[column].max(), 'count': df[column].count() }