Spaces:
Sleeping
Sleeping
| from io import StringIO | |
| from types import SimpleNamespace | |
| import joblib | |
| import pandas as pd | |
| from predicting_outcomes_in_heart_failure.config import MODELS_DIR, TARGET_COL | |
| from predicting_outcomes_in_heart_failure.modeling import evaluate | |
| import pytest | |
| TEST_PROCESSED_CSV = ( | |
| "Age,Sex,RestingBP,Cholesterol,FastingBS,MaxHR,ExerciseAngina,Oldpeak,HeartDisease," | |
| "ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA," | |
| "RestingECG_LVH,RestingECG_Normal,RestingECG_ST,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up\n" | |
| "-1.4322063372940435,1,0.41462668821399407,0.8574469341726604,0,1.3833394263306962," | |
| "0,-0.8315022488659315,0,False,True,False,False,False,True,False,False,False,True\n" | |
| "-0.47805724933087407,0,1.5263596504719819,-1.183717051045972,0,0.7547357326016333," | |
| "0,0.10625148648034725,1,False,False,True,False,False,True,False,False,True,False\n" | |
| "-1.7502560332817665,1,-0.14123979291499986,0.7450892836101669,0,-1.5239526571662194," | |
| "0,-0.8315022488659315,0,False,True,False,False,False,False,True,False,False,True\n" | |
| "-0.5840738146601151,0,0.30345339198819526,-0.5470236978585087,0,-1.1310753485855551," | |
| "1,0.5751283541534866,1,True,False,False,False,False,True,False,False,True,False\n" | |
| "0.05202557731533111,1,0.970493169342988,-0.9028229246397381,0,-0.5810471165726252," | |
| "0,-0.8315022488659315,0,False,False,True,False,False,True,False,False,False,True\n" | |
| "-1.5382229026232843,1,-0.6971062740439938,1.7937606888601065,0,1.3047639646145632," | |
| "0,-0.8315022488659315,0,False,False,True,False,False,True,False,False,False,True\n" | |
| "-0.9021235106478382,0,-0.14123979291499986,-0.11631937070228347,0,1.3047639646145632," | |
| "0,-0.8315022488659315,0,False,True,False,False,False,True,False,False,False,True\n" | |
| "0.05202557731533111,1,-1.2529727551729877,-0.6593813484210022,0,0.2047075005887033," | |
| "0,-0.8315022488659315,0,False,True,False,False,False,True,False,False,False,True\n" | |
| "-1.7502560332817665,1,0.41462668821399407,-0.6781076235147511,0,-0.2667452697080938," | |
| "1,0.5751283541534866,1,True,False,False,False,False,True,False,False,True,False\n" | |
| ) | |
| def processed_df(): | |
| df = pd.read_csv(StringIO(TEST_PROCESSED_CSV)) | |
| return df | |
| def definition_X_test_and_y_test(processed_df): | |
| df = processed_df | |
| X_test = df.drop(columns=[TARGET_COL]) | |
| y_test = df[TARGET_COL] | |
| return X_test, y_test | |
| def logreg_model(): | |
| path = MODELS_DIR / "all" / "logreg.joblib" | |
| model = joblib.load(path) | |
| return model | |
| def decision_tree_model(): | |
| path = MODELS_DIR / "all" / "decision_tree.joblib" | |
| return joblib.load(path) | |
| def random_forest_model(): | |
| path = MODELS_DIR / "all" / "random_forest.joblib" | |
| return joblib.load(path) | |
| def sample_raw_df_single(): | |
| """ | |
| Single-row raw sample, similar to the one used in main(). | |
| Used to test overall preprocessing shape / columns / NaNs. | |
| """ | |
| return pd.DataFrame( | |
| { | |
| "Age": [54], | |
| "Sex": ["F"], | |
| "ChestPainType": ["ASY"], | |
| "RestingBP": [140], | |
| "Cholesterol": [239], | |
| "FastingBS": [0], | |
| "RestingECG": ["Normal"], | |
| "MaxHR": [160], | |
| "ExerciseAngina": ["N"], | |
| "Oldpeak": [0.0], | |
| "ST_Slope": ["Up"], | |
| } | |
| ) | |
| def sample_raw_df_two_rows(): | |
| """ | |
| Two-row raw sample with variation in categorical features, | |
| used to test binary encodings and one-hot encoding. | |
| """ | |
| return pd.DataFrame( | |
| { | |
| "Age": [50, 60], | |
| "Sex": ["M", "F"], | |
| "ChestPainType": ["ASY", "NAP"], | |
| "RestingBP": [130, 140], | |
| "Cholesterol": [220, 250], | |
| "FastingBS": [0, 1], | |
| "RestingECG": ["Normal", "ST"], | |
| "MaxHR": [150, 140], | |
| "ExerciseAngina": ["Y", "N"], | |
| "Oldpeak": [1.0, 0.0], | |
| "ST_Slope": ["Up", "Flat"], | |
| } | |
| ) | |
| def sample_raw_df_only_asy_up(): | |
| """ | |
| Three-row raw sample where: | |
| - ChestPainType is always ASY | |
| - RestingECG is always Normal | |
| - ST_Slope is always Up | |
| Used to test that missing dummy columns (NAP, TA, ST, LVH, Flat, Down) | |
| are still present and filled with zeros. | |
| """ | |
| return pd.DataFrame( | |
| { | |
| "Age": [50, 60, 55], | |
| "Sex": ["M", "F", "M"], | |
| "ChestPainType": ["ASY", "ASY", "ASY"], | |
| "RestingBP": [130, 140, 135], | |
| "Cholesterol": [220, 250, 230], | |
| "FastingBS": [0, 1, 0], | |
| "RestingECG": ["Normal", "Normal", "Normal"], | |
| "MaxHR": [150, 140, 145], | |
| "ExerciseAngina": ["Y", "N", "Y"], | |
| "Oldpeak": [1.0, 0.0, 0.5], | |
| "ST_Slope": ["Up", "Up", "Up"], | |
| } | |
| ) | |
| def dummy_logger(monkeypatch): | |
| class DummyLogger: | |
| def __init__(self): | |
| self.warnings = [] | |
| self.infos = [] | |
| self.errors = [] | |
| self.successes = [] | |
| def warning(self, msg): | |
| self.warnings.append(msg) | |
| def info(self, msg): | |
| self.infos.append(msg) | |
| def error(self, msg): | |
| self.errors.append(msg) | |
| def success(self, msg): | |
| self.successes.append(msg) | |
| logger = DummyLogger() | |
| monkeypatch.setattr(evaluate, "logger", logger) | |
| return logger | |
| def mlflow_no_runs(monkeypatch): | |
| class DummyMlflow: | |
| called_search_runs = 0 | |
| class data: | |
| def from_pandas(*args, **kwargs): | |
| pytest.fail("mlflow.data.from_pandas should not be called when there are no runs") | |
| class sklearn: | |
| def log_model(*args, **kwargs): | |
| pytest.fail("mlflow.sklearn.log_model should not be called when there are no runs") | |
| def get_experiment_by_name(name): | |
| # we SImulate an empty experiment | |
| return SimpleNamespace(experiment_id="exp-123") | |
| def search_runs(experiment_ids, filter_string, order_by, max_results): | |
| DummyMlflow.called_search_runs += 1 | |
| # Empty DataFrame → runs.empty == True | |
| return pd.DataFrame() | |
| def start_run(run_id): | |
| pytest.fail("mlflow.start_run should not be called when there are no runs") | |
| def log_input(*args, **kwargs): | |
| pytest.fail("mlflow.log_input should not be called when there are no runs") | |
| def log_metrics(*args, **kwargs): | |
| pytest.fail("mlflow.log_metrics should not be called when there are no runs") | |
| monkeypatch.setattr(evaluate, "mlflow", DummyMlflow) | |
| return DummyMlflow | |
| def mlflow_experiment_missing(monkeypatch): | |
| class DummyMlflow: | |
| called_get_experiment = 0 | |
| called_search_runs = 0 | |
| def get_experiment_by_name(name): | |
| DummyMlflow.called_get_experiment += 1 | |
| return None # not founded experiment | |
| def search_runs(*args, **kwargs): | |
| DummyMlflow.called_search_runs += 1 | |
| return pd.DataFrame() | |
| monkeypatch.setattr(evaluate, "mlflow", DummyMlflow) | |
| return DummyMlflow | |