Spaces:
Sleeping
Sleeping
| import os | |
| import pytest | |
| import joblib | |
| import numpy as np | |
| import pandas as pd | |
| MODELS_DIR = os.path.join('models') | |
| PROCESSED_DIR = os.path.join('data', 'processed') | |
| FINAL_MODEL = os.path.join(MODELS_DIR, 'xgboost_tuned.pkl') | |
| SCALER_PATH = os.path.join(MODELS_DIR, 'scaler.pkl') | |
| ALL_MODELS = [ | |
| 'logistic_regression.pkl', | |
| 'decision_tree.pkl', | |
| 'random_forest.pkl', | |
| 'extra_trees.pkl', | |
| 'adaboost.pkl', | |
| 'gradient_boosting.pkl', | |
| 'xgboost.pkl', | |
| 'lightgbm.pkl', | |
| 'catboost.pkl', | |
| 'xgboost_tuned.pkl', | |
| 'catboost_final.pkl', | |
| ] | |
| def test_final_model_exists(): | |
| """The model served by the API must exist.""" | |
| assert os.path.exists(FINAL_MODEL), f"Final model not found: {FINAL_MODEL}" | |
| def test_all_models_loadable(): | |
| """Every trained model must load without errors.""" | |
| for fname in ALL_MODELS: | |
| path = os.path.join(MODELS_DIR, fname) | |
| assert os.path.exists(path), f"Missing model file: {fname}" | |
| model = joblib.load(path) | |
| assert model is not None | |
| def test_final_model_has_predict_proba(): | |
| """Final model must support probability output — required by the API.""" | |
| model = joblib.load(FINAL_MODEL) | |
| assert hasattr(model, 'predict_proba'), "Model must have predict_proba method" | |
| def test_prediction_shape(): | |
| """Model must return one prediction per input row.""" | |
| model = joblib.load(FINAL_MODEL) | |
| scaler = joblib.load(SCALER_PATH) | |
| X_test = joblib.load(os.path.join(PROCESSED_DIR, 'X_test.pkl')) | |
| sample = X_test.iloc[:10].copy() | |
| sample[['Amount', 'Time']] = scaler.transform(sample[['Amount', 'Time']]) | |
| preds = model.predict(sample) | |
| probas = model.predict_proba(sample) | |
| assert len(preds) == 10 | |
| assert probas.shape == (10, 2) | |
| def test_prediction_values_are_binary(): | |
| """Predictions must only be 0 or 1.""" | |
| model = joblib.load(FINAL_MODEL) | |
| scaler = joblib.load(SCALER_PATH) | |
| X_test = joblib.load(os.path.join(PROCESSED_DIR, 'X_test.pkl')) | |
| sample = X_test.iloc[:50].copy() | |
| sample[['Amount', 'Time']] = scaler.transform(sample[['Amount', 'Time']]) | |
| preds = model.predict(sample) | |
| assert set(preds).issubset({0, 1}), f"Unexpected prediction values: {set(preds)}" | |
| def test_probabilities_between_0_and_1(): | |
| """Fraud probabilities must always be in [0, 1].""" | |
| model = joblib.load(FINAL_MODEL) | |
| scaler = joblib.load(SCALER_PATH) | |
| X_test = joblib.load(os.path.join(PROCESSED_DIR, 'X_test.pkl')) | |
| sample = X_test.iloc[:50].copy() | |
| sample[['Amount', 'Time']] = scaler.transform(sample[['Amount', 'Time']]) | |
| probas = model.predict_proba(sample)[:, 1] | |
| assert (probas >= 0).all() and (probas <= 1).all(), "Probabilities out of [0,1] range" | |
| def test_model_catches_some_fraud(): | |
| """Model must detect at least some fraud on the real test set (sanity check).""" | |
| model = joblib.load(FINAL_MODEL) | |
| scaler = joblib.load(SCALER_PATH) | |
| X_test = joblib.load(os.path.join(PROCESSED_DIR, 'X_test.pkl')) | |
| y_test = joblib.load(os.path.join(PROCESSED_DIR, 'y_test.pkl')) | |
| X = X_test.copy() | |
| X[['Amount', 'Time']] = scaler.transform(X[['Amount', 'Time']]) | |
| preds = model.predict(X) | |
| fraud_caught = ((preds == 1) & (y_test == 1)).sum() | |
| assert fraud_caught > 0, "Model caught zero fraud cases — something is wrong" | |