github_sync / A4 /test_models.py
Reem
assert_test_performance_in_classification_artifact
e05be87
import pytest
import os
import pickle
import numpy as np
# regression model tests
class TestRegressionModelLoading:
def test_regression_model_file_exists(self, regression_model_path):
if not os.path.exists(regression_model_path):
pytest.skip(f"Model not found (LFS not pulled?): {regression_model_path}")
assert os.path.exists(regression_model_path)
def test_regression_artifact_is_dict(self, regression_artifact):
assert isinstance(regression_artifact, dict)
def test_regression_artifact_has_model_key(self, regression_artifact):
assert "model" in regression_artifact
def test_regression_artifact_has_feature_columns(self, regression_artifact):
assert "feature_columns" in regression_artifact
def test_regression_feature_columns_not_empty(self, regression_artifact):
assert len(regression_artifact["feature_columns"]) > 0
def test_regression_model_has_predict_method(self, regression_artifact):
model = regression_artifact["model"]
assert hasattr(model, "predict")
class TestRegressionModelPrediction:
def test_regression_prediction_returns_array(
self, regression_artifact, sample_regression_features
):
# regression model should return numpy
model = regression_artifact["model"]
prediction = model.predict(sample_regression_features)
assert isinstance(prediction, np.ndarray)
def test_regression_prediction_shape(
self, regression_artifact, sample_regression_features
):
# one value for sample
model = regression_artifact["model"]
prediction = model.predict(sample_regression_features)
assert prediction.shape[0] == len(sample_regression_features)
def test_regression_prediction_is_numeric(
self, regression_artifact, sample_regression_features
):
# should be a number
model = regression_artifact["model"]
prediction = model.predict(sample_regression_features)
assert np.issubdtype(prediction.dtype, np.number)
def test_regression_prediction_in_reasonable_range(
self, regression_artifact, sample_regression_features
):
model = regression_artifact["model"]
prediction = model.predict(sample_regression_features)[0]
# Allow some tolerance outside 0-1 for edge cases
assert -0.5 <= prediction <= 1.5
class TestClassificationModelLoading:
def test_classification_model_file_exists(self, classification_model_path):
if not os.path.exists(classification_model_path):
pytest.skip(f"Model not found (LFS not pulled?): {classification_model_path}")
assert os.path.exists(classification_model_path)
def test_classification_artifact_is_dict(self, classification_artifact):
assert isinstance(classification_artifact, dict)
def test_classification_artifact_has_model_key(self, classification_artifact):
assert "model" in classification_artifact
def test_classification_artifact_has_feature_columns(self, classification_artifact):
assert "feature_columns" in classification_artifact
def test_classification_model_has_classes(self, classification_artifact):
model = classification_artifact["model"]
assert hasattr(model, "classes_")
def test_classification_model_has_predict_method(self, classification_artifact):
model = classification_artifact["model"]
assert hasattr(model, "predict")
def test_classification_classes_match_expected(
self, classification_artifact, expected_classification_classes
):
classes = list(classification_artifact["model"].classes_)
assert sorted(classes) == sorted(expected_classification_classes)
class TestClassificationModelPrediction:
def test_classification_prediction_returns_array(
self, classification_artifact, sample_classification_features
):
model = classification_artifact["model"]
scaler = classification_artifact.get("scaler")
features = sample_classification_features
if scaler is not None:
features = scaler.transform(features)
prediction = model.predict(features)
assert isinstance(prediction, np.ndarray)
def test_classification_prediction_shape(
self, classification_artifact, sample_classification_features
):
# one class per sample
model = classification_artifact["model"]
scaler = classification_artifact.get("scaler")
features = sample_classification_features
if scaler is not None:
features = scaler.transform(features)
prediction = model.predict(features)
assert prediction.shape[0] == len(sample_classification_features)
def test_classification_prediction_is_valid_class(
self, classification_artifact, sample_classification_features,
expected_classification_classes
):
# should be a valid class
model = classification_artifact["model"]
scaler = classification_artifact.get("scaler")
features = sample_classification_features
if scaler is not None:
features = scaler.transform(features)
prediction = model.predict(features)[0]
assert prediction in expected_classification_classes
class TestModelArtifactStructure:
def test_regression_artifact_has_metrics(self, regression_artifact):
assert "test_metrics" in regression_artifact
#def test_classification_artifact_has_metrics(self, classification_artifact):
#assert "test_metrics" in classification_artifact
def test_classification_artifact_has_metrics(self, classification_artifact):
assert "test_performance" in classification_artifact # match actual key
def test_regression_metrics_has_r2(self, regression_artifact):
metrics = regression_artifact.get("test_metrics", {})
assert "r2" in metrics
def test_regression_r2_is_positive(self, regression_artifact):
metrics = regression_artifact.get("test_metrics", {})
r2 = metrics.get("r2", 0)
assert r2 > 0
class TestErrorHandling:
def test_load_nonexistent_model_raises_error(self, repo_root):
fake_path = os.path.join(repo_root, "nonexistent_model.pkl")
with pytest.raises(FileNotFoundError):
with open(fake_path, "rb") as f:
pickle.load(f)
def test_regression_model_with_wrong_features_raises(
self, regression_artifact
):
import pandas as pd
model = regression_artifact["model"]
wrong_features = pd.DataFrame({"wrong_feature": [0.5]})
with pytest.raises((ValueError, KeyError)):
model.predict(wrong_features)