File size: 4,845 Bytes
f7d11f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import joblib
import pandas as pd
from predicting_outcomes_in_heart_failure.config import (
    MODELS_DIR,
    PREPROCESSED_CSV,
    TARGET_COL,
)
import pytest


@pytest.fixture
def sample_features():
    df = pd.read_csv(PREPROCESSED_CSV).iloc[100:].copy()
    return df.iloc[[1]].drop(columns=[TARGET_COL])


@pytest.fixture
def trained_models():
    """
    Load all saved models.
    """
    models = {}
    models_path = MODELS_DIR / "all"

    for model_file in models_path.iterdir():
        if model_file.suffix == ".joblib":
            model_name = model_file.stem
            models[model_name] = joblib.load(model_file)

    return models


class TestModelDirectional:
    def test_one_hot_encoding_invariance(self, trained_models, sample_features):
        """
        Check how the model reacts to a change in an one hot encoded feature.
        """

        models = trained_models
        original = sample_features.copy()

        st = [c for c in sample_features.columns if c.startswith("ST_Slope_")]
        active_col = original[st].columns[original[st].iloc[0] == 1][0]
        other_col = [c for c in st if c != active_col][0]

        modified = original.copy()
        modified[active_col] = 0
        modified[other_col] = 1

        for _, model in models.items():
            pred_original = model.predict_proba(original)[0, 1]
            pred_modified = model.predict_proba(modified)[0, 1]
            assert pred_original != pred_modified

    def test_outlier_effect_on_prediction(self, trained_models, sample_features):
        """
        Check how the model reacts to an outlier.
        """

        models = trained_models
        original = sample_features.copy()

        modified = original.copy()
        modified["ExerciseAngina"] = original["ExerciseAngina"].iloc[0] + 1

        for model_name, model in models.items():
            pred_original = model.predict_proba(original)[0, 1]
            pred_modified = model.predict_proba(modified)[0, 1]

            assert pred_original != pred_modified, f"{model_name}: model not sensitive to outlier"

    def test_age_effect(self, trained_models, sample_features):
        """
        Higher age should generally be associated with increased risk.
        """

        models = trained_models

        younger = sample_features.copy()
        younger["Age"] = -1.5

        older = sample_features.copy()
        older["Age"] = 2.0

        for model_name, model in models.items():
            prob_younger = model.predict_proba(younger)[0, 1]
            prob_older = model.predict_proba(older)[0, 1]

            assert prob_older >= prob_younger, f"{model_name}: unexpected age effect"

    def test_max_heart_rate_relationship(self, trained_models, sample_features):
        """
        Lower maximum heart rate achieved should generally increase risk
        """

        models = trained_models

        high_hr = sample_features.copy()
        high_hr["MaxHR"] = 2.0

        low_hr = sample_features.copy()
        low_hr["MaxHR"] = -2.0

        for model_name, model in models.items():
            prob_high = model.predict_proba(high_hr)[0, 1]
            prob_low = model.predict_proba(low_hr)[0, 1]

            assert prob_low >= prob_high - 0.15, (
                f"{model_name}: unexpected directionality for MaxHR. "
            )

    def test_oldpeak_elevation_increases_risk(self, trained_models, sample_features):
        """
        Higher Oldpeak should increase heart disease probability
        """

        models = trained_models

        low_oldpeak = sample_features.copy()
        low_oldpeak["Oldpeak"] = -1.0

        high_oldpeak = sample_features.copy()
        high_oldpeak["Oldpeak"] = 2.0

        for model_name, model in models.items():
            prob_low = model.predict_proba(low_oldpeak)[0, 1]
            prob_high = model.predict_proba(high_oldpeak)[0, 1]

            assert prob_high >= prob_low - 0.15, (
                f"{model_name}: unexpected directionality for Oldpeak. "
            )

    def test_exercise_angina_increases_risk(self, trained_models, sample_features):
        """
        Exercise-induced angina should generally increase heart disease probability
        """

        models = trained_models

        no_angina = sample_features.copy()
        no_angina["ExerciseAngina"] = 0

        with_angina = sample_features.copy()
        with_angina["ExerciseAngina"] = 1

        for model_name, model in models.items():
            if hasattr(model, "predict_proba"):
                prob_no_angina = model.predict_proba(no_angina)[0, 1]
                prob_with_angina = model.predict_proba(with_angina)[0, 1]

                assert prob_with_angina >= prob_no_angina - 0.15, (
                    f"{model_name}: unexpected directionality for ExerciseAngina "
                )