#!/usr/bin/env python3
"""
Script to fetch and analyze data from Google Sheets
Usage: python fetch_and_analyze.py
"""

import os
import sys
import json
import requests
from datetime import datetime

# Add parent directory to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from data.sheets_service import GoogleSheetsService, DataPreprocessor


def fetch_and_analyze():
    """Fetch data from Google Sheets and analyze it"""

    print("=" * 60)
    print("Gapura AI Analysis - Data Fetch & Analyze")
    print("=" * 60)

    # Initialize services
    sheets_service = GoogleSheetsService()
    preprocessor = DataPreprocessor()

    # Configuration
    spreadsheet_id = os.getenv("GOOGLE_SHEET_ID")

    if not spreadsheet_id:
        print("ERROR: GOOGLE_SHEET_ID not set in environment")
        sys.exit(1)

    print(f"\nFetching data from spreadsheet: {spreadsheet_id}")
    print("-" * 60)

    # Fetch NON CARGO data
    print("\n1. Fetching NON CARGO sheet...")
    try:
        non_cargo_data = sheets_service.fetch_sheet_data(
            spreadsheet_id=spreadsheet_id,
            sheet_name="NON CARGO",
            range_str="A1:AA100",  # First 100 rows including header
        )
        print(f"   ✓ Fetched {len(non_cargo_data)} records")
    except Exception as e:
        print(f"   ✗ Error: {str(e)}")
        non_cargo_data = []

    # Fetch CGO data
    print("\n2. Fetching CGO sheet...")
    try:
        cargo_data = sheets_service.fetch_sheet_data(
            spreadsheet_id=spreadsheet_id, sheet_name="CGO", range_str="A1:Z100"
        )
        print(f"   ✓ Fetched {len(cargo_data)} records")
    except Exception as e:
        print(f"   ✗ Error: {str(e)}")
        cargo_data = []

    # Combine data
    all_data = non_cargo_data + cargo_data
    print(f"\n   Total records: {len(all_data)}")

    if not all_data:
        print("\nNo data fetched. Exiting.")
        return

    # Preprocess data
    print("\n3. Preprocessing data...")
    preprocessed_data = preprocessor.preprocess_batch(
        all_data[:10]
    )  # First 10 for demo
    print(f"   ✓ Preprocessed {len(preprocessed_data)} records")

    # Display sample features
    print("\n4. Sample Features:")
    print("-" * 60)
    for i, features in enumerate(preprocessed_data[:3]):
        print(f"\nRecord {i + 1}:")
        print(f"  Airline: {features['airline']}")
        print(f"  Hub: {features['hub']}")
        print(f"  Category: {features['category']}")
        print(f"  Report Length: {features['report_length']} chars")
        print(f"  Word Count: {features['report_word_count']}")
        print(f"  Severity Score: {features['severity_keyword_count']}")
        print(f"  Has Photos: {features['has_photos']}")
        print(f"  Is Closed: {features['is_closed']}")

    # Save to file
    output_file = f"analysis_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
    print(f"\n5. Saving data to {output_file}...")

    with open(output_file, "w") as f:
        json.dump(
            {
                "metadata": {
                    "spreadsheet_id": spreadsheet_id,
                    "fetched_at": datetime.now().isoformat(),
                    "non_cargo_count": len(non_cargo_data),
                    "cargo_count": len(cargo_data),
                    "total_count": len(all_data),
                },
                "raw_data": all_data[:50],  # Save first 50 records
                "preprocessed_features": preprocessed_data[:10],
            },
            f,
            indent=2,
            default=str,
        )

    print(f"   ✓ Data saved to {output_file}")

    # Aggregate statistics
    print("\n6. Aggregate Statistics:")
    print("-" * 60)

    # Count by airline
    airline_counts = {}
    hub_counts = {}
    category_counts = {}

    for report in all_data:
        airline = report.get("Airlines", "Unknown")
        hub = report.get("HUB", "Unknown")
        category = report.get("Irregularity_Complain_Category", "Unknown")

        airline_counts[airline] = airline_counts.get(airline, 0) + 1
        hub_counts[hub] = hub_counts.get(hub, 0) + 1
        category_counts[category] = category_counts.get(category, 0) + 1

    print("\nBy Airline:")
    for airline, count in sorted(
        airline_counts.items(), key=lambda x: x[1], reverse=True
    )[:5]:
        print(f"  {airline}: {count}")

    print("\nBy Hub:")
    for hub, count in sorted(hub_counts.items(), key=lambda x: x[1], reverse=True):
        print(f"  {hub}: {count}")

    print("\nBy Category:")
    for category, count in sorted(
        category_counts.items(), key=lambda x: x[1], reverse=True
    )[:5]:
        print(f"  {category}: {count}")

    print("\n" + "=" * 60)
    print("Analysis complete!")
    print("=" * 60)


if __name__ == "__main__":
    fetch_and_analyze()