misscp / tests /test_neurips_appendix_artifacts.py
Anonymous
Initial anonymous MissCP release
32f5a65
from __future__ import annotations
import json
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
import pytest
def _write_dummy_png(path: Path) -> Path:
path.parent.mkdir(parents=True, exist_ok=True)
fig, ax = plt.subplots(figsize=(1.5, 1.0))
ax.plot([0, 1], [0, 1])
fig.savefig(path, dpi=100)
plt.close(fig)
return path
def test_run_appendix_artifacts_writes_bundle(tmp_path: Path) -> None:
from sepsis_mcp.neurips_appendix_artifacts import run_appendix_artifacts
score_dir = tmp_path / "score"
eta_dir = tmp_path / "eta"
calsize_dir = tmp_path / "calsize"
model_swap_dir = tmp_path / "swap"
stress_dir = tmp_path / "stress"
output_dir = tmp_path / "artifacts"
score_dir.mkdir()
eta_dir.mkdir()
calsize_dir.mkdir()
model_swap_dir.mkdir()
stress_dir.mkdir()
pd.DataFrame(
[
{"dataset": "gossis", "model_type": "xgboost", "seed": 0, "split_kind": "calibration", "group": 0, "group_label": "low", "score": 0.1},
{"dataset": "gossis", "model_type": "xgboost", "seed": 0, "split_kind": "calibration", "group": 1, "group_label": "high", "score": 0.8},
{"dataset": "gossis", "model_type": "xgboost", "seed": 0, "split_kind": "test", "group": 0, "group_label": "low", "score": 0.2},
{"dataset": "gossis", "model_type": "xgboost", "seed": 0, "split_kind": "test", "group": 1, "group_label": "high", "score": 0.9},
]
).to_csv(score_dir / "score_records.csv", index=False)
pd.DataFrame(
[
{
"model_type": "xgboost",
"group_label": "low",
"mean_eta": 0.20,
"std_eta": 0.01,
"mean_delta": 0.05,
"std_delta": 0.01,
"mean_cost": 0.02,
"advantage_rate": 1.0,
}
]
).to_csv(eta_dir / "eta_aggregate.csv", index=False)
pd.DataFrame(
[
{"cal_size": 250, "cal_size_label": "250", "method": "standard", "coverage_mean": 0.88, "max_gap_mean": 0.06, "set_size_mean": 0.92},
{"cal_size": 250, "cal_size_label": "250", "method": "missingness_aware", "coverage_mean": 0.90, "max_gap_mean": 0.03, "set_size_mean": 0.94},
]
).to_csv(calsize_dir / "calsize_summary.csv", index=False)
_write_dummy_png(calsize_dir / "calsize_panels.png")
pd.DataFrame(
[
{
"calibrate_model": "logistic_regression",
"deploy_model": "xgboost",
"grouping": "coverage_gap_variable",
"mean_gap": 0.04,
"mean_coverage": 0.90,
"mean_test_group_overlap_vs_self": 1.0,
"mean_coverage_drop_vs_self": 0.01,
"mean_gap_increase_vs_self": 0.00,
},
{
"calibrate_model": "xgboost",
"deploy_model": "xgboost",
"grouping": "coverage_gap_variable",
"mean_gap": 0.02,
"mean_coverage": 0.91,
"mean_test_group_overlap_vs_self": 1.0,
"mean_coverage_drop_vs_self": 0.00,
"mean_gap_increase_vs_self": 0.00,
},
]
).to_csv(model_swap_dir / "swap_matrix.csv", index=False)
pd.DataFrame(
[
{
"comparison": "coverage_gap_variable_vs_predicted_risk_tercile_frozen",
"win_grouping": "coverage_gap_variable",
"win_count": 3,
"mean_gap_difference": 0.02,
}
]
).to_csv(model_swap_dir / "swap_win_summary.csv", index=False)
pd.DataFrame(
[
{"perturbation_type": "mar_dropout", "perturbation_strength": 0.0, "drop_rate": 0.0, "method": "standard", "mean_gap": 0.03, "std_gap": 0.0, "run_count": 1},
{"perturbation_type": "mar_dropout", "perturbation_strength": 0.3, "drop_rate": 0.3, "method": "standard", "mean_gap": 0.06, "std_gap": 0.0, "run_count": 1},
{"perturbation_type": "mar_dropout", "perturbation_strength": 0.0, "drop_rate": 0.0, "method": "missingness_aware", "mean_gap": 0.02, "std_gap": 0.0, "run_count": 1},
{"perturbation_type": "mar_dropout", "perturbation_strength": 0.3, "drop_rate": 0.3, "method": "missingness_aware", "mean_gap": 0.03, "std_gap": 0.0, "run_count": 1},
]
).to_csv(stress_dir / "stress_aggregate.csv", index=False)
paths = run_appendix_artifacts(
score_dir=score_dir,
eta_dir=eta_dir,
calsize_dir=calsize_dir,
model_swap_dir=model_swap_dir,
stress_dir=stress_dir,
output_dir=output_dir,
)
assert Path(paths["score_figure"]).exists()
assert Path(paths["eta_figure"]).exists()
assert Path(paths["swap_figure"]).exists()
assert Path(paths["stress_figure"]).exists()
manifest = json.loads((output_dir / "manifest.json").read_text(encoding="utf-8"))
assert "score_figure" in manifest
assert "provenance" in manifest
assert (output_dir / "source_tables" / "score_records.csv").exists()
assert (output_dir / "source_tables" / "appendix_eta_focus_table.csv").exists()
eta_focus = pd.read_csv(output_dir / "source_tables" / "appendix_eta_focus_table.csv")
assert "mean_eta_minus_cost" in eta_focus.columns
assert "mean_eta_minus_delta" not in eta_focus.columns
assert eta_focus.loc[0, "mean_eta_minus_cost"] == pytest.approx(0.18)
assert (output_dir / "appendix" / "calsize_panels.png").exists()
provenance = json.loads((output_dir / "provenance.json").read_text(encoding="utf-8"))
assert "git" in provenance
assert "inputs" in provenance
assert "artifacts" in provenance
assert "appendix_score_ecdfs.png" in provenance["artifacts"]
score_info = provenance["artifacts"]["appendix_score_ecdfs.png"]
assert score_info["derived_from"] == ["score_records.csv"]
assert (output_dir / "TRACEABILITY.md").exists()