misscp / tests /test_external_baselines.py
Anonymous
Initial anonymous MissCP release
32f5a65
from __future__ import annotations
import importlib
import numpy as np
import pandas as pd
import pytest
def test_gibbs_missingness_feature_transform_reuses_calibration_bin_edges() -> None:
from sepsis_mcp.external_baselines import fit_gibbs_missingness_feature_transform
calibration_frame = pd.DataFrame(
{
"global_missing_rate": [0.10, 0.20, 0.80, 0.90],
}
)
test_frame = pd.DataFrame(
{
"global_missing_rate": [0.15, 0.85],
}
)
transform = fit_gibbs_missingness_feature_transform(
calibration_frame=calibration_frame,
calibration_positive_probabilities=[0.2, 0.3, 0.7, 0.8],
quantile_grid=(1 / 3, 2 / 3),
)
calibration_features = transform.transform(
calibration_frame,
positive_probabilities=[0.2, 0.3, 0.7, 0.8],
)
test_features = transform.transform(
test_frame,
positive_probabilities=[0.25, 0.75],
)
assert transform.feature_names_[0] == "model_positive_probability"
assert transform.bin_edges_.shape == (2,)
assert calibration_features.shape == (4, len(transform.feature_names_))
assert test_features.shape == (2, len(transform.feature_names_))
assert np.allclose(test_features[:, 1], [0.15, 0.85])
def test_gibbs_general_feature_transform_emits_named_columns() -> None:
from sepsis_mcp.external_baselines import fit_gibbs_general_feature_transform
calibration_frame = pd.DataFrame(
{
"age": [40, 70, 50],
"severity": [1.0, 2.0, 1.5],
"global_missing_rate": [0.1, 0.4, 0.2],
}
)
transform = fit_gibbs_general_feature_transform(
calibration_frame=calibration_frame,
calibration_positive_probabilities=[0.2, 0.7, 0.4],
candidate_columns=["age", "severity"],
)
features = transform.transform(
calibration_frame,
positive_probabilities=[0.2, 0.7, 0.4],
)
assert transform.feature_names_ == [
"model_positive_probability",
"age",
"severity",
]
assert features.shape == (3, 3)
def test_gibbs_general_feature_transform_imputes_non_finite_values_from_calibration() -> None:
from sepsis_mcp.external_baselines import fit_gibbs_general_feature_transform
calibration_frame = pd.DataFrame(
{
"age": [40.0, np.nan, 60.0],
"severity": [1.0, 2.0, np.inf],
}
)
test_frame = pd.DataFrame(
{
"age": [np.nan, 55.0],
"severity": [np.nan, -np.inf],
}
)
transform = fit_gibbs_general_feature_transform(
calibration_frame=calibration_frame,
calibration_positive_probabilities=[0.2, 0.7, 0.4],
candidate_columns=["age", "severity"],
)
calibration_features = transform.transform(
calibration_frame,
positive_probabilities=[0.2, 0.7, 0.4],
)
test_features = transform.transform(
test_frame,
positive_probabilities=[0.3, 0.6],
)
assert np.isfinite(calibration_features).all()
assert np.isfinite(test_features).all()
assert np.allclose(test_features[:, 1], [50.0, 55.0])
assert np.allclose(test_features[:, 2], [1.5, 1.5])
def test_learned_partition_classifier_assigns_groups_and_predicts_sets() -> None:
from sepsis_mcp.external_baselines import LearnedPartitionConformalClassifier
selection_features = pd.DataFrame({"signal": [0.0, 0.1, 0.9, 1.0]})
calibration_features = pd.DataFrame({"signal": [0.05, 0.95, 0.15, 0.85]})
test_features = pd.DataFrame({"signal": [0.02, 0.98]})
classifier = LearnedPartitionConformalClassifier(
alpha=0.4,
min_group_size=2,
max_leaf_nodes=2,
random_state=0,
).fit(
selection_features=selection_features,
selection_labels=[0, 0, 1, 1],
selection_positive_probabilities=[0.1, 0.2, 0.8, 0.9],
calibration_features=calibration_features,
calibration_labels=[0, 1, 0, 1],
calibration_positive_probabilities=[0.15, 0.85, 0.25, 0.75],
)
predicted_groups = classifier.predict_groups(test_features)
prediction_sets = classifier.predict_sets(
positive_probabilities=[0.1, 0.9],
test_features=test_features,
)
diagnostics = classifier.diagnostics_summary()
assert predicted_groups.shape == (2,)
assert len(set(predicted_groups.tolist())) == 2
assert prediction_sets == [{0}, {1}]
assert diagnostics["leaf_count"] == 2
def test_gibbs_classifier_raises_actionable_error_when_dependency_missing(monkeypatch: pytest.MonkeyPatch) -> None:
from sepsis_mcp.external_baselines import GibbsConditionalConformalClassifier
original_import_module = importlib.import_module
def fake_import_module(name: str, package: str | None = None):
if name == "conditionalconformal":
raise ModuleNotFoundError("No module named 'conditionalconformal'")
return original_import_module(name, package)
monkeypatch.setattr(importlib, "import_module", fake_import_module)
classifier = GibbsConditionalConformalClassifier(alpha=0.2)
with pytest.raises(ImportError, match="conditionalconformal"):
classifier.fit(
calibration_labels=[0, 1],
calibration_positive_probabilities=[0.1, 0.9],
calibration_features=np.asarray([[0.1, 0.2], [0.9, 0.8]], dtype=float),
)
def test_gibbs_classifier_uses_finite_basis_condconf_path(monkeypatch: pytest.MonkeyPatch) -> None:
from sepsis_mcp.external_baselines import GibbsConditionalConformalClassifier
captured: dict[str, object] = {}
class FakeCondConf:
def __init__(self, score_fn, Phi_fn, quantile_fn, infinite_params):
captured["quantile_fn"] = quantile_fn
captured["infinite_params"] = infinite_params
self._score_fn = score_fn
self._phi_fn = Phi_fn
def setup_problem(self, X, Y): # noqa: N802
captured["setup_shape"] = np.asarray(X).shape
def predict(self, quantile, x_test, score_inv_fn, **kwargs):
captured.setdefault("predict_quantiles", []).append(float(quantile))
captured.setdefault("predict_shapes", []).append(np.asarray(x_test).shape)
captured.setdefault("predict_kwargs", []).append(kwargs)
return score_inv_fn(0.2, np.asarray(x_test))
original_import_module = importlib.import_module
def fake_import_module(name: str, package: str | None = None):
if name == "conditionalconformal":
return type("FakeModule", (), {"CondConf": FakeCondConf})
return original_import_module(name, package)
monkeypatch.setattr(importlib, "import_module", fake_import_module)
classifier = GibbsConditionalConformalClassifier(alpha=0.2)
classifier.fit(
calibration_labels=[0, 1],
calibration_positive_probabilities=[0.1, 0.9],
calibration_features=np.asarray([[0.1, 0.2], [0.9, 0.8]], dtype=float),
)
thresholds = classifier.thresholds_for_test_features(
np.asarray([[0.2, 0.3], [0.8, 0.7]], dtype=float)
)
assert captured["quantile_fn"] is None
assert captured["infinite_params"] == {}
assert captured["predict_quantiles"] == [pytest.approx(0.8), pytest.approx(0.8)]
assert captured["predict_shapes"] == [(1, 2), (1, 2)]
assert captured["predict_kwargs"] == [
{"randomize": False, "exact": False},
{"randomize": False, "exact": False},
]
assert np.allclose(thresholds, [0.2, 0.2])