import os
import pytest
import joblib
import numpy as np
import pandas as pd

MODELS_DIR    = os.path.join('models')
PROCESSED_DIR = os.path.join('data', 'processed')

FINAL_MODEL = os.path.join(MODELS_DIR, 'xgboost_tuned.pkl')
SCALER_PATH = os.path.join(MODELS_DIR, 'scaler.pkl')

ALL_MODELS = [
    'logistic_regression.pkl',
    'decision_tree.pkl',
    'random_forest.pkl',
    'extra_trees.pkl',
    'adaboost.pkl',
    'gradient_boosting.pkl',
    'xgboost.pkl',
    'lightgbm.pkl',
    'catboost.pkl',
    'xgboost_tuned.pkl',
    'catboost_final.pkl',
]


def test_final_model_exists():
    """The model served by the API must exist."""
    assert os.path.exists(FINAL_MODEL), f"Final model not found: {FINAL_MODEL}"


def test_all_models_loadable():
    """Every trained model must load without errors."""
    for fname in ALL_MODELS:
        path = os.path.join(MODELS_DIR, fname)
        assert os.path.exists(path), f"Missing model file: {fname}"
        model = joblib.load(path)
        assert model is not None


def test_final_model_has_predict_proba():
    """Final model must support probability output — required by the API."""
    model = joblib.load(FINAL_MODEL)
    assert hasattr(model, 'predict_proba'), "Model must have predict_proba method"


def test_prediction_shape():
    """Model must return one prediction per input row."""
    model  = joblib.load(FINAL_MODEL)
    scaler = joblib.load(SCALER_PATH)
    X_test = joblib.load(os.path.join(PROCESSED_DIR, 'X_test.pkl'))

    sample = X_test.iloc[:10].copy()
    sample[['Amount', 'Time']] = scaler.transform(sample[['Amount', 'Time']])

    preds   = model.predict(sample)
    probas  = model.predict_proba(sample)

    assert len(preds)        == 10
    assert probas.shape      == (10, 2)


def test_prediction_values_are_binary():
    """Predictions must only be 0 or 1."""
    model  = joblib.load(FINAL_MODEL)
    scaler = joblib.load(SCALER_PATH)
    X_test = joblib.load(os.path.join(PROCESSED_DIR, 'X_test.pkl'))

    sample = X_test.iloc[:50].copy()
    sample[['Amount', 'Time']] = scaler.transform(sample[['Amount', 'Time']])

    preds = model.predict(sample)
    assert set(preds).issubset({0, 1}), f"Unexpected prediction values: {set(preds)}"


def test_probabilities_between_0_and_1():
    """Fraud probabilities must always be in [0, 1]."""
    model  = joblib.load(FINAL_MODEL)
    scaler = joblib.load(SCALER_PATH)
    X_test = joblib.load(os.path.join(PROCESSED_DIR, 'X_test.pkl'))

    sample = X_test.iloc[:50].copy()
    sample[['Amount', 'Time']] = scaler.transform(sample[['Amount', 'Time']])

    probas = model.predict_proba(sample)[:, 1]
    assert (probas >= 0).all() and (probas <= 1).all(), "Probabilities out of [0,1] range"


def test_model_catches_some_fraud():
    """Model must detect at least some fraud on the real test set (sanity check)."""
    model  = joblib.load(FINAL_MODEL)
    scaler = joblib.load(SCALER_PATH)
    X_test = joblib.load(os.path.join(PROCESSED_DIR, 'X_test.pkl'))
    y_test = joblib.load(os.path.join(PROCESSED_DIR, 'y_test.pkl'))

    X = X_test.copy()
    X[['Amount', 'Time']] = scaler.transform(X[['Amount', 'Time']])

    preds       = model.predict(X)
    fraud_caught = ((preds == 1) & (y_test == 1)).sum()
    assert fraud_caught > 0, "Model caught zero fraud cases — something is wrong"