"""
Model loading and prediction logic for HappySardines.

Loads the XGBoost model from Hopsworks Model Registry and makes predictions.
"""

import os
import numpy as np
import pandas as pd
from dotenv import load_dotenv

load_dotenv()

# Global model cache
_model = None
_model_loaded = False

# Occupancy class labels with display info
OCCUPANCY_LABELS = {
    0: {
        "label": "Empty",
        "message": "Plenty of room - pick any seat!",
        "color": "green",
        "icon": "🟢"
    },
    1: {
        "label": "Many seats available",
        "message": "Lots of seats to choose from.",
        "color": "green",
        "icon": "🟢"
    },
    2: {
        "label": "Few seats available",
        "message": "Some seats left - you might need to look around.",
        "color": "yellow",
        "icon": "🟡"
    },
    3: {
        "label": "Standing room only",
        "message": "Expect to stand - pack your patience!",
        "color": "orange",
        "icon": "🟠"
    },
    4: {
        "label": "Crushed standing",
        "message": "Very crowded - consider waiting for the next one.",
        "color": "red",
        "icon": "🔴"
    },
    5: {
        "label": "Full",
        "message": "Bus is full - you may not get on.",
        "color": "red",
        "icon": "🔴"
    },
    6: {
        "label": "Not accepting passengers",
        "message": "Bus is not accepting passengers.",
        "color": "gray",
        "icon": "⚫"
    }
}

# Feature order expected by the model (occupancy_xgboost_model_new v4)
# Must match training pipeline exactly - includes lat/lon bounds and bearing
FEATURE_ORDER = [
    "trip_id",
    "vehicle_id",
    "max_speed",
    "n_positions",
    "lat_min",
    "lat_max",
    "lat_mean",
    "lon_min",
    "lon_max",
    "lon_mean",
    "bearing_min",
    "bearing_max",
    "hour",
    "day_of_week",
    "temperature_2m",
    "precipitation",
    "cloud_cover",
    "wind_speed_10m",
    "rain",
    "snowfall",
    "is_work_free",
    "is_red_day",
    "is_day_before_holiday",
]

# Default values for vehicle features (we don't have real-time vehicle data)
# These are approximate averages from the training data
DEFAULT_VEHICLE_FEATURES = {
    "max_speed": 45.0,      # typical max speed
    "n_positions": 30,      # typical GPS points per trip window
    "bearing_min": 0.0,     # neutral bearing
    "bearing_max": 360.0,   # full range (stationary/unknown direction)
}


def load_model():
    """
    Load model from Hopsworks Model Registry.

    Caches the model globally for reuse.
    """
    global _model, _model_loaded

    if _model_loaded:
        return _model

    # Check for API key before attempting connection
    api_key = os.environ.get("HOPSWORKS_API_KEY")
    project = os.environ.get("HOPSWORKS_PROJECT")
    if not api_key:
        raise ValueError("HOPSWORKS_API_KEY environment variable not set. Please add it in Space settings.")

    try:
        import hopsworks
        from xgboost import XGBClassifier

        print("Connecting to Hopsworks...")
        project = hopsworks.login(project=project, api_key_value=api_key)
        mr = project.get_model_registry()

        print("Fetching model from registry...")
        # Get version 4 explicitly (the model trained with 23 features)
        model_entry = mr.get_model("occupancy_xgboost_model_new", version=4)

        print(f"Downloading model version {model_entry.version}...")
        model_dir = model_entry.download()

        print("Loading XGBoost model...")
        model = XGBClassifier()
        model.load_model(os.path.join(model_dir, "model.json"))

        _model = model
        _model_loaded = True
        print("Model loaded successfully!")

        return model

    except Exception as e:
        print(f"Error loading model: {e}")
        raise


def predict_occupancy(lat, lon, hour, day_of_week, weather, holidays):
    """
    Predict occupancy for given inputs.

    Args:
        lat: Latitude
        lon: Longitude
        hour: Hour of day (0-23)
        day_of_week: Day of week (0=Monday, 6=Sunday)
        weather: Dict with temperature_2m, precipitation, cloud_cover, wind_speed_10m
        holidays: Dict with is_work_free, is_red_day, is_day_before_holiday

    Returns:
        Tuple of (predicted_class, confidence, all_probabilities)
    """
    model = load_model()

    # Assemble feature vector
    features = {
        # Vehicle features - use defaults
        "trip_id": 0,         # placeholder
        "vehicle_id": 0,      # placeholder
        "max_speed": DEFAULT_VEHICLE_FEATURES["max_speed"],
        "n_positions": DEFAULT_VEHICLE_FEATURES["n_positions"],

        # Location bounds (set equal to point for single-location prediction)
        "lat_min": lat,
        "lat_max": lat,
        "lat_mean": lat,
        "lon_min": lon,
        "lon_max": lon,
        "lon_mean": lon,

        # Bearing (neutral values for point prediction)
        "bearing_min": DEFAULT_VEHICLE_FEATURES["bearing_min"],
        "bearing_max": DEFAULT_VEHICLE_FEATURES["bearing_max"],

        # Time
        "hour": hour,
        "day_of_week": day_of_week,

        # Weather
        "temperature_2m": weather.get("temperature_2m", 10.0),
        "precipitation": weather.get("precipitation", 0.0),
        "cloud_cover": weather.get("cloud_cover", 50.0),
        "wind_speed_10m": weather.get("wind_speed_10m", 5.0),
        "rain": weather.get("rain", 0.0),
        "snowfall": weather.get("snowfall", 0.0),

        # Holidays (convert bool to int)
        "is_work_free": int(holidays.get("is_work_free", False)),
        "is_red_day": int(holidays.get("is_red_day", False)),
        "is_day_before_holiday": int(holidays.get("is_day_before_holiday", False)),
    }

    # Create DataFrame with correct feature order
    X = pd.DataFrame([features])[FEATURE_ORDER]

    # Get prediction probabilities
    probabilities = model.predict_proba(X)[0]

    # Get predicted class (highest probability)
    predicted_class = int(np.argmax(probabilities))
    confidence = float(probabilities[predicted_class])

    return predicted_class, confidence, probabilities.tolist()


def predict_occupancy_batch(locations, hour, day_of_week, weather, holidays):
    """
    Predict occupancy for multiple locations in a single batch.

    Much faster than calling predict_occupancy() in a loop.

    Args:
        locations: List of (lat, lon) tuples
        hour: Hour of day (0-23)
        day_of_week: Day of week (0=Monday, 6=Sunday)
        weather: Dict with temperature_2m, precipitation, cloud_cover, wind_speed_10m
        holidays: Dict with is_work_free, is_red_day, is_day_before_holiday

    Returns:
        List of (predicted_class, confidence) tuples
    """
    model = load_model()

    # Build all feature rows at once
    rows = []
    for lat, lon in locations:
        rows.append({
            "trip_id": 0,
            "vehicle_id": 0,
            "max_speed": DEFAULT_VEHICLE_FEATURES["max_speed"],
            "n_positions": DEFAULT_VEHICLE_FEATURES["n_positions"],
            "lat_min": lat,
            "lat_max": lat,
            "lat_mean": lat,
            "lon_min": lon,
            "lon_max": lon,
            "lon_mean": lon,
            "bearing_min": DEFAULT_VEHICLE_FEATURES["bearing_min"],
            "bearing_max": DEFAULT_VEHICLE_FEATURES["bearing_max"],
            "hour": hour,
            "day_of_week": day_of_week,
            "temperature_2m": weather.get("temperature_2m", 10.0),
            "precipitation": weather.get("precipitation", 0.0),
            "cloud_cover": weather.get("cloud_cover", 50.0),
            "wind_speed_10m": weather.get("wind_speed_10m", 5.0),
            "rain": weather.get("rain", 0.0),
            "snowfall": weather.get("snowfall", 0.0),
            "is_work_free": int(holidays.get("is_work_free", False)),
            "is_red_day": int(holidays.get("is_red_day", False)),
            "is_day_before_holiday": int(holidays.get("is_day_before_holiday", False)),
        })

    # Single DataFrame, single predict call
    X = pd.DataFrame(rows)[FEATURE_ORDER]
    probabilities = model.predict_proba(X)

    # Extract results
    results = []
    for i, (lat, lon) in enumerate(locations):
        probs = probabilities[i]
        predicted_class = int(np.argmax(probs))
        confidence = float(probs[predicted_class])
        results.append((predicted_class, confidence))

    return results