Spaces:
Sleeping
Sleeping
File size: 4,845 Bytes
f7d11f7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import joblib
import pandas as pd
from predicting_outcomes_in_heart_failure.config import (
MODELS_DIR,
PREPROCESSED_CSV,
TARGET_COL,
)
import pytest
@pytest.fixture
def sample_features():
df = pd.read_csv(PREPROCESSED_CSV).iloc[100:].copy()
return df.iloc[[1]].drop(columns=[TARGET_COL])
@pytest.fixture
def trained_models():
"""
Load all saved models.
"""
models = {}
models_path = MODELS_DIR / "all"
for model_file in models_path.iterdir():
if model_file.suffix == ".joblib":
model_name = model_file.stem
models[model_name] = joblib.load(model_file)
return models
class TestModelDirectional:
def test_one_hot_encoding_invariance(self, trained_models, sample_features):
"""
Check how the model reacts to a change in an one hot encoded feature.
"""
models = trained_models
original = sample_features.copy()
st = [c for c in sample_features.columns if c.startswith("ST_Slope_")]
active_col = original[st].columns[original[st].iloc[0] == 1][0]
other_col = [c for c in st if c != active_col][0]
modified = original.copy()
modified[active_col] = 0
modified[other_col] = 1
for _, model in models.items():
pred_original = model.predict_proba(original)[0, 1]
pred_modified = model.predict_proba(modified)[0, 1]
assert pred_original != pred_modified
def test_outlier_effect_on_prediction(self, trained_models, sample_features):
"""
Check how the model reacts to an outlier.
"""
models = trained_models
original = sample_features.copy()
modified = original.copy()
modified["ExerciseAngina"] = original["ExerciseAngina"].iloc[0] + 1
for model_name, model in models.items():
pred_original = model.predict_proba(original)[0, 1]
pred_modified = model.predict_proba(modified)[0, 1]
assert pred_original != pred_modified, f"{model_name}: model not sensitive to outlier"
def test_age_effect(self, trained_models, sample_features):
"""
Higher age should generally be associated with increased risk.
"""
models = trained_models
younger = sample_features.copy()
younger["Age"] = -1.5
older = sample_features.copy()
older["Age"] = 2.0
for model_name, model in models.items():
prob_younger = model.predict_proba(younger)[0, 1]
prob_older = model.predict_proba(older)[0, 1]
assert prob_older >= prob_younger, f"{model_name}: unexpected age effect"
def test_max_heart_rate_relationship(self, trained_models, sample_features):
"""
Lower maximum heart rate achieved should generally increase risk
"""
models = trained_models
high_hr = sample_features.copy()
high_hr["MaxHR"] = 2.0
low_hr = sample_features.copy()
low_hr["MaxHR"] = -2.0
for model_name, model in models.items():
prob_high = model.predict_proba(high_hr)[0, 1]
prob_low = model.predict_proba(low_hr)[0, 1]
assert prob_low >= prob_high - 0.15, (
f"{model_name}: unexpected directionality for MaxHR. "
)
def test_oldpeak_elevation_increases_risk(self, trained_models, sample_features):
"""
Higher Oldpeak should increase heart disease probability
"""
models = trained_models
low_oldpeak = sample_features.copy()
low_oldpeak["Oldpeak"] = -1.0
high_oldpeak = sample_features.copy()
high_oldpeak["Oldpeak"] = 2.0
for model_name, model in models.items():
prob_low = model.predict_proba(low_oldpeak)[0, 1]
prob_high = model.predict_proba(high_oldpeak)[0, 1]
assert prob_high >= prob_low - 0.15, (
f"{model_name}: unexpected directionality for Oldpeak. "
)
def test_exercise_angina_increases_risk(self, trained_models, sample_features):
"""
Exercise-induced angina should generally increase heart disease probability
"""
models = trained_models
no_angina = sample_features.copy()
no_angina["ExerciseAngina"] = 0
with_angina = sample_features.copy()
with_angina["ExerciseAngina"] = 1
for model_name, model in models.items():
if hasattr(model, "predict_proba"):
prob_no_angina = model.predict_proba(no_angina)[0, 1]
prob_with_angina = model.predict_proba(with_angina)[0, 1]
assert prob_with_angina >= prob_no_angina - 0.15, (
f"{model_name}: unexpected directionality for ExerciseAngina "
)
|