"""
Enhanced Telecommunications Data Generator - Complete Implementation
Implements all data sources from Technical Requirements Document
"""

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from faker import Faker
import random
from tqdm import tqdm

np.random.seed(42)
random.seed(42)
fake = Faker()
Faker.seed(42)


class EnhancedTelecomDataGenerator:
    """
    Comprehensive data generator implementing ALL technical requirements:
    - Device Data (OS, apps, performance)
    - Competitive Intelligence
    - External Data (weather, events, demographics)
    - Enhanced network metrics
    - Customer journey analytics
    """

    def __init__(self, num_customers=100000):
        self.num_customers = num_customers
        self.start_date = pd.to_datetime('2022-01-01')
        self.end_date = pd.to_datetime('2024-12-31')

        # Device configurations
        self.os_versions = {
            'iOS': ['16.0', '16.1', '16.2', '17.0', '17.1', '17.2'],
            'Android': ['12', '13', '14'],
        }

        self.popular_apps = [
            'WhatsApp', 'Facebook', 'Instagram', 'YouTube', 'TikTok',
            'Netflix', 'Spotify', 'Gmail', 'Google Maps', 'Twitter',
            'Snapchat', 'LinkedIn', 'Uber', 'Amazon', 'Zoom'
        ]

        # Competitor data
        self.competitors = ['Verizon', 'AT&T', 'T-Mobile', 'Sprint']

        # Weather conditions
        self.weather_conditions = ['Clear', 'Cloudy', 'Rainy', 'Stormy', 'Snowy', 'Foggy']

        # Event types
        self.event_types = ['Concert', 'Sports', 'Festival', 'Convention', 'Holiday']

        print(f"Initialized Enhanced Data Generator for {num_customers:,} customers")

    def generate_device_data(self, customers_df):
        """
        Generate comprehensive device data:
        - OS version & update history
        - App usage patterns
        - Device performance metrics
        - Battery health & storage
        """
        print("\n📱 Generating Device Performance Data...")

        device_data = []

        for _, customer in tqdm(customers_df.iterrows(), total=len(customers_df), desc="Device data"):
            # Determine OS from device manufacturer
            manufacturer = customer['device_manufacturer']
            if manufacturer == 'Apple':
                os_type = 'iOS'
                os_version = random.choice(self.os_versions['iOS'])
            else:
                os_type = 'Android'
                os_version = random.choice(self.os_versions['Android'])

            # App usage (select 5-12 apps per customer)
            num_apps = random.randint(5, 12)
            user_apps = random.sample(self.popular_apps, num_apps)

            # Performance metrics
            device = {
                'customer_id': customer['customer_id'],
                'os_type': os_type,
                'os_version': os_version,
                'os_last_updated': (self.end_date - timedelta(days=random.randint(0, 180))).date(),
                'storage_total_gb': random.choice([64, 128, 256, 512]),
                'storage_used_pct': random.uniform(40, 95),
                'battery_health_pct': max(70, 100 - customer['device_age_months'] * 1.5),
                'avg_battery_drain_pct_per_hour': random.uniform(3, 15),
                'apps_installed': len(user_apps),
                'top_apps': ','.join(user_apps[:5]),
                'avg_daily_screen_time_hours': random.uniform(2, 8),
                'data_saver_enabled': random.random() < 0.3,
                'background_data_restricted': random.random() < 0.25,
                'wifi_calling_enabled': random.random() < 0.6,
                'volte_enabled': random.random() < 0.8,
                '5g_enabled': random.random() < 0.7,
                'device_temperature_avg_celsius': random.uniform(25, 40),
                'crash_count_last_month': np.random.poisson(1),
                'avg_app_load_time_sec': random.uniform(0.5, 3.0),
                'memory_pressure_high_pct': random.uniform(5, 40),
            }

            device_data.append(device)

        df = pd.DataFrame(device_data)
        print(f"✅ Generated device data for {len(df):,} customers")
        return df

    def generate_competitive_intelligence(self):
        """
        Generate market intelligence data:
        - Competitor pricing
        - Market share trends
        - Promotional campaigns
        - Customer migration patterns
        """
        print("\n🏢 Generating Competitive Intelligence Data...")

        months = pd.date_range(start=self.start_date, end=self.end_date, freq='MS')

        market_data = []

        for month in tqdm(months, desc="Market analysis"):
            for competitor in self.competitors:
                # Pricing data
                base_price = random.uniform(40, 120)
                promo_active = random.random() < 0.3

                market_entry = {
                    'month': month.date(),
                    'competitor': competitor,
                    'base_plan_price': round(base_price, 2),
                    'unlimited_plan_price': round(base_price * 1.8, 2),
                    'family_plan_price': round(base_price * 2.5, 2),
                    'promotion_active': promo_active,
                    'promotion_discount_pct': random.uniform(10, 30) if promo_active else 0,
                    'market_share_pct': random.uniform(15, 30),
                    'customer_satisfaction_score': random.uniform(3.5, 4.8),
                    'network_quality_score': random.uniform(7, 9.5),
                    '5g_coverage_pct': random.uniform(40, 85),
                    'avg_download_speed_mbps': random.uniform(50, 300),
                    'churn_rate_pct': random.uniform(1.5, 3.5),
                    'new_customer_acquisitions': random.randint(50000, 200000),
                    'advertising_spend_millions': random.uniform(5, 25),
                }

                market_data.append(market_entry)

        df = pd.DataFrame(market_data)
        print(f"✅ Generated {len(df):,} market intelligence records")
        return df

    def generate_external_data(self, towers_df):
        """
        Generate external data sources:
        - Weather conditions by location
        - Local events
        - Demographic/census data
        - Economic indicators
        """
        print("\n🌍 Generating External Data Sources...")

        # Weather data (daily by tower location)
        print("  Generating weather data...")
        weather_data = []
        dates = pd.date_range(start=self.end_date - timedelta(days=90), end=self.end_date, freq='D')

        # Sample subset of towers for weather
        sample_towers = towers_df.sample(n=min(200, len(towers_df)), random_state=42)

        for tower_id, tower in tqdm(sample_towers.iterrows(), total=len(sample_towers), desc="Weather"):
            for date in dates:
                weather = {
                    'date': date.date(),
                    'tower_id': tower['tower_id'],
                    'city': tower['city'],
                    'temperature_celsius': random.uniform(-10, 35),
                    'humidity_pct': random.uniform(30, 90),
                    'precipitation_mm': max(0, np.random.exponential(2)),
                    'wind_speed_kmh': random.uniform(5, 50),
                    'condition': random.choice(self.weather_conditions),
                    'severe_weather': random.random() < 0.05,
                }
                weather_data.append(weather)

        weather_df = pd.DataFrame(weather_data)

        # Events data
        print("  Generating events data...")
        events_data = []
        num_events = 500

        for i in range(num_events):
            event_date = fake.date_between(start_date=self.start_date, end_date=self.end_date)

            event = {
                'event_id': f'EVT{i+1:05d}',
                'event_name': f'{random.choice(self.event_types)} {i+1}',
                'event_type': random.choice(self.event_types),
                'event_date': event_date,
                'city': random.choice(towers_df['city'].unique()),
                'expected_attendance': random.randint(1000, 100000),
                'duration_hours': random.randint(2, 48),
            }
            events_data.append(event)

        events_df = pd.DataFrame(events_data)

        # Demographics/Census data by city
        print("  Generating demographic data...")
        cities = towers_df['city'].unique()
        demographics_data = []

        for city in cities:
            demo = {
                'city': city,
                'population': random.randint(100000, 5000000),
                'median_age': random.uniform(30, 45),
                'median_income': random.randint(40000, 100000),
                'unemployment_rate_pct': random.uniform(3, 8),
                'college_educated_pct': random.uniform(25, 60),
                'homeownership_rate_pct': random.uniform(40, 70),
                'population_density_per_sqkm': random.randint(100, 10000),
                'urban_classification': random.choice(['Urban', 'Suburban', 'Rural']),
            }
            demographics_data.append(demo)

        demographics_df = pd.DataFrame(demographics_data)

        print(f"✅ Weather: {len(weather_df):,} records")
        print(f"✅ Events: {len(events_df):,} records")
        print(f"✅ Demographics: {len(demographics_df):,} cities")

        return {
            'weather': weather_df,
            'events': events_df,
            'demographics': demographics_df
        }

    def generate_customer_journey_data(self, customers_df):
        """
        Generate customer journey analytics:
        - Lifecycle stages
        - Service interaction history
        - Payment behavior patterns
        - Customer segmentation
        """
        print("\n👤 Generating Customer Journey Data...")

        journey_data = []

        for _, customer in tqdm(customers_df.iterrows(), total=len(customers_df), desc="Customer journeys"):
            # Lifecycle stage based on tenure
            tenure = customer['tenure_months']
            if tenure < 3:
                lifecycle_stage = 'New'
                engagement_score = random.uniform(6, 9)
            elif tenure < 12:
                lifecycle_stage = 'Growing'
                engagement_score = random.uniform(7, 10)
            elif tenure < 36:
                lifecycle_stage = 'Mature'
                engagement_score = random.uniform(5, 9)
            else:
                lifecycle_stage = 'Tenured'
                engagement_score = random.uniform(4, 8)

            # Payment behavior
            payment_score = random.uniform(1, 10)
            late_payment_risk = 'Low' if payment_score > 7 else ('Medium' if payment_score > 4 else 'High')

            journey = {
                'customer_id': customer['customer_id'],
                'lifecycle_stage': lifecycle_stage,
                'engagement_score': round(engagement_score, 2),
                'value_segment': random.choice(['High Value', 'Medium Value', 'Low Value']),
                'loyalty_tier': random.choice(['Bronze', 'Silver', 'Gold', 'Platinum']),
                'payment_behavior_score': round(payment_score, 2),
                'late_payment_risk': late_payment_risk,
                'total_interactions': np.random.poisson(tenure * 0.3),
                'positive_interactions_pct': random.uniform(60, 95),
                'nps_score': random.randint(-100, 100),
                'referrals_made': customer.get('referral_count', 0),
                'upsell_opportunities': np.random.poisson(2),
                'cross_sell_score': random.uniform(0, 10),
                'reactivation_risk': random.uniform(0, 1),
                'social_influence_score': random.uniform(0, 10),
            }

            journey_data.append(journey)

        df = pd.DataFrame(journey_data)
        print(f"✅ Generated journey data for {len(df):,} customers")
        return df


def main():
    """Generate all enhanced datasets"""
    print("="*80)
    print("ENHANCED TELECOMMUNICATIONS DATA GENERATOR")
    print("="*80)

    # First generate base data using original generator
    from synthetic_data_generator import TelecomDataGenerator

    base_gen = TelecomDataGenerator(num_customers=100000, num_towers=1000)

    print("\n📊 Generating Base Data...")
    customers_df = base_gen.generate_customer_demographics()
    towers_df = base_gen.generate_network_infrastructure()

    # Initialize enhanced generator
    enhanced_gen = EnhancedTelecomDataGenerator(num_customers=len(customers_df))

    # Generate enhanced data
    device_df = enhanced_gen.generate_device_data(customers_df)
    competitive_df = enhanced_gen.generate_competitive_intelligence()
    external_data = enhanced_gen.generate_external_data(towers_df)
    journey_df = enhanced_gen.generate_customer_journey_data(customers_df)

    # Save all datasets
    print("\n💾 Saving Enhanced Datasets...")

    device_df.to_csv('data/synthetic/device_data.csv', index=False)
    print("  ✅ Saved device_data.csv")

    competitive_df.to_csv('data/synthetic/competitive_intelligence.csv', index=False)
    print("  ✅ Saved competitive_intelligence.csv")

    external_data['weather'].to_csv('data/synthetic/weather_data.csv', index=False)
    print("  ✅ Saved weather_data.csv")

    external_data['events'].to_csv('data/synthetic/events_data.csv', index=False)
    print("  ✅ Saved events_data.csv")

    external_data['demographics'].to_csv('data/synthetic/demographics_data.csv', index=False)
    print("  ✅ Saved demographics_data.csv")

    journey_df.to_csv('data/synthetic/customer_journey.csv', index=False)
    print("  ✅ Saved customer_journey.csv")

    print("\n" + "="*80)
    print("ENHANCED DATA GENERATION COMPLETE")
    print("="*80)
    print(f"\n📈 Summary:")
    print(f"  - Device Data: {len(device_df):,} records")
    print(f"  - Competitive Intelligence: {len(competitive_df):,} records")
    print(f"  - Weather Data: {len(external_data['weather']):,} records")
    print(f"  - Events Data: {len(external_data['events']):,} records")
    print(f"  - Demographics: {len(external_data['demographics']):,} cities")
    print(f"  - Customer Journey: {len(journey_df):,} customers")
    print("\n✅ All enhanced datasets saved to 'data/synthetic/' directory")


if __name__ == "__main__":
    main()