misscp / tests /test_paper_artifacts.py
Anonymous
Initial anonymous MissCP release
32f5a65
from __future__ import annotations
import json
from pathlib import Path
import pandas as pd
from sepsis_mcp.paper_artifacts import (
build_physionet_appendix_table,
build_physionet_gap_plot_frame,
build_physionet_model_auroc_frame,
build_gossis_validation_subgroup_frame,
build_gossis_validation_table,
build_physionet_main_table,
main,
)
def _write_physionet_dropout_run(base_dir: Path, experiment_suffix: str, coverage_shift: float) -> None:
base_dir.mkdir(parents=True, exist_ok=True)
metrics = {
"config": {"alpha": 0.1},
"experiments": {
f"A_to_A{experiment_suffix}": {
"standard": {
"empirical_coverage": 0.91 - coverage_shift,
"average_set_size": 0.95 - coverage_shift,
"target_coverage": 0.9,
},
"missingness_aware": {
"empirical_coverage": 0.89 + coverage_shift,
"average_set_size": 0.96,
"target_coverage": 0.9,
},
"weighted_missingness_aware": {
"empirical_coverage": 0.9,
"average_set_size": 0.94,
"target_coverage": 0.9,
},
"shrunk_weighted_missingness_aware": {
"empirical_coverage": 0.905,
"average_set_size": 0.93,
"target_coverage": 0.9,
},
},
f"A_to_B{experiment_suffix}": {
"standard": {
"empirical_coverage": 0.9 - coverage_shift,
"average_set_size": 0.91 - coverage_shift,
"target_coverage": 0.9,
},
"missingness_aware": {
"empirical_coverage": 0.88 + coverage_shift,
"average_set_size": 0.915 + coverage_shift,
"target_coverage": 0.9,
},
"weighted_missingness_aware": {
"empirical_coverage": 0.885 + coverage_shift / 2,
"average_set_size": 0.9,
"target_coverage": 0.9,
},
"shrunk_weighted_missingness_aware": {
"empirical_coverage": 0.89 + coverage_shift / 3,
"average_set_size": 0.905,
"target_coverage": 0.9,
},
},
},
}
(base_dir / "metrics.json").write_text(json.dumps(metrics), encoding="utf-8")
def _write_dropout_summary(path: Path) -> Path:
pd.DataFrame(
[
{"condition": "none", "mask_rate": 0.0, "method": "standard", "empirical_coverage": 0.8976, "target_coverage": 0.9, "absolute_coverage_gap": 0.0024, "average_set_size": 0.9126},
{"condition": "none", "mask_rate": 0.0, "method": "missingness_aware", "empirical_coverage": 0.8772, "target_coverage": 0.9, "absolute_coverage_gap": 0.0228, "average_set_size": 0.8914},
{"condition": "drop=0.3", "mask_rate": 0.3, "method": "standard", "empirical_coverage": 0.8847, "target_coverage": 0.9, "absolute_coverage_gap": 0.0153, "average_set_size": 0.8971},
{"condition": "drop=0.3", "mask_rate": 0.3, "method": "missingness_aware", "empirical_coverage": 0.8962, "target_coverage": 0.9, "absolute_coverage_gap": 0.0038, "average_set_size": 0.9104},
{"condition": "drop=0.5", "mask_rate": 0.5, "method": "standard", "empirical_coverage": 0.8661, "target_coverage": 0.9, "absolute_coverage_gap": 0.0339, "average_set_size": 0.8794},
{"condition": "drop=0.5", "mask_rate": 0.5, "method": "missingness_aware", "empirical_coverage": 0.8993, "target_coverage": 0.9, "absolute_coverage_gap": 0.0007, "average_set_size": 0.9149},
]
).to_csv(path, index=False)
return path
def _write_model_dropout_summary(path: Path) -> Path:
pd.DataFrame(
[
{"condition": "none", "mask_rate": 0.0, "model_type": "xgboost", "auroc": 0.81, "auprc": 0.10},
{"condition": "drop=0.3", "mask_rate": 0.3, "model_type": "xgboost", "auroc": 0.78, "auprc": 0.09},
{"condition": "none", "mask_rate": 0.0, "model_type": "sklearn_gbdt", "auroc": 0.79, "auprc": 0.08},
{"condition": "drop=0.3", "mask_rate": 0.3, "model_type": "sklearn_gbdt", "auroc": 0.75, "auprc": 0.07},
{"condition": "none", "mask_rate": 0.0, "model_type": "logistic_regression", "auroc": 0.72, "auprc": 0.05},
{"condition": "drop=0.3", "mask_rate": 0.3, "model_type": "logistic_regression", "auroc": 0.69, "auprc": 0.04},
]
).to_csv(path, index=False)
return path
def _write_gossis_summary_files(base_dir: Path) -> tuple[Path, Path]:
base_dir.mkdir(parents=True, exist_ok=True)
overall_path = base_dir / "overall_summary.csv"
subgroup_path = base_dir / "subgroup_summary.csv"
pd.DataFrame(
[
{
"method": "standard",
"min_hospital_admissions": 500,
"empirical_coverage": 0.895,
"average_set_size": 0.95,
"max_group_coverage_gap": 0.08,
"worst_hospital_coverage": 0.84,
},
{
"method": "standard",
"min_hospital_admissions": 500,
"empirical_coverage": 0.885,
"average_set_size": 0.94,
"max_group_coverage_gap": 0.10,
"worst_hospital_coverage": 0.83,
},
{
"method": "missingness_aware",
"min_hospital_admissions": 500,
"empirical_coverage": 0.898,
"average_set_size": 0.96,
"max_group_coverage_gap": 0.02,
"worst_hospital_coverage": 0.835,
},
{
"method": "weighted_missingness_aware",
"min_hospital_admissions": 500,
"empirical_coverage": 0.9,
"average_set_size": 0.955,
"max_group_coverage_gap": 0.05,
"worst_hospital_coverage": 0.845,
},
{
"method": "shrunk_weighted_missingness_aware",
"min_hospital_admissions": 500,
"empirical_coverage": 0.897,
"average_set_size": 0.952,
"max_group_coverage_gap": 0.07,
"worst_hospital_coverage": 0.844,
},
]
).to_csv(overall_path, index=False)
pd.DataFrame(
[
{"method": "standard", "min_hospital_admissions": 500, "group_label": "low", "coverage": 0.82},
{"method": "standard", "min_hospital_admissions": 500, "group_label": "medium", "coverage": 0.92},
{"method": "standard", "min_hospital_admissions": 500, "group_label": "high", "coverage": 0.94},
{"method": "missingness_aware", "min_hospital_admissions": 500, "group_label": "low", "coverage": 0.89},
{"method": "missingness_aware", "min_hospital_admissions": 500, "group_label": "medium", "coverage": 0.90},
{"method": "missingness_aware", "min_hospital_admissions": 500, "group_label": "high", "coverage": 0.91},
{"method": "weighted_missingness_aware", "min_hospital_admissions": 500, "group_label": "low", "coverage": 0.86},
{"method": "weighted_missingness_aware", "min_hospital_admissions": 500, "group_label": "medium", "coverage": 0.91},
{"method": "weighted_missingness_aware", "min_hospital_admissions": 500, "group_label": "high", "coverage": 0.93},
]
).to_csv(subgroup_path, index=False)
return overall_path, subgroup_path
def _write_gossis_summary_files_with_opt_in_methods(base_dir: Path) -> tuple[Path, Path]:
overall_path, subgroup_path = _write_gossis_summary_files(base_dir)
overall = pd.read_csv(overall_path)
subgroup = pd.read_csv(subgroup_path)
pd.concat(
[
overall,
pd.DataFrame(
[
{
"method": "learned_partition",
"min_hospital_admissions": 500,
"empirical_coverage": 0.901,
"average_set_size": 0.958,
"max_group_coverage_gap": 0.03,
"worst_hospital_coverage": 0.84,
},
{
"method": "gibbs_general",
"min_hospital_admissions": 500,
"empirical_coverage": 0.902,
"average_set_size": 0.959,
"max_group_coverage_gap": 0.04,
"worst_hospital_coverage": 0.845,
},
]
),
],
ignore_index=True,
).to_csv(overall_path, index=False)
pd.concat(
[
subgroup,
pd.DataFrame(
[
{"method": "learned_partition", "min_hospital_admissions": 500, "group_label": "low", "coverage": 0.88},
{"method": "gibbs_general", "min_hospital_admissions": 500, "group_label": "low", "coverage": 0.87},
]
),
],
ignore_index=True,
).to_csv(subgroup_path, index=False)
return overall_path, subgroup_path
def test_build_physionet_tables_and_plot_frame(tmp_path: Path) -> None:
_write_physionet_dropout_run(tmp_path / "none", "", 0.0)
_write_physionet_dropout_run(tmp_path / "rd03", "__random_drop", 0.02)
_write_physionet_dropout_run(tmp_path / "rd05", "__random_drop", 0.04)
summary_path = _write_dropout_summary(tmp_path / "dropout_grid_summary.csv")
model_summary_path = _write_model_dropout_summary(tmp_path / "model_dropout_grid_summary.csv")
table = build_physionet_main_table(tmp_path / "none", tmp_path / "rd03", tmp_path / "rd05")
appendix = build_physionet_appendix_table(tmp_path / "none", tmp_path / "rd03", tmp_path / "rd05")
stress = build_physionet_gap_plot_frame(summary_path)
model_auroc = build_physionet_model_auroc_frame(model_summary_path)
assert len(table) == 18
assert list(table.columns) == [
"setting",
"experiment",
"condition",
"method",
"empirical_coverage",
"average_set_size",
"absolute_coverage_gap",
]
assert set(table["method"]) == {"standard", "missingness_aware", "weighted_missingness_aware"}
assert len(appendix) == 24
assert "shrunk_weighted_missingness_aware" in set(appendix["method"])
assert set(stress["method"]) == {"standard", "missingness_aware"}
assert set(stress["condition"]) == {"none", "drop=0.3", "drop=0.5"}
assert list(stress.columns) == ["condition", "mask_rate", "method", "empirical_coverage", "target_coverage", "absolute_coverage_gap", "average_set_size"]
assert (stress["absolute_coverage_gap"] >= 0).all()
assert set(model_auroc["model_type"]) == {"xgboost", "sklearn_gbdt", "logistic_regression"}
assert list(model_auroc.columns) == ["condition", "mask_rate", "model_type", "auroc", "auprc"]
def test_build_gossis_validation_frames(tmp_path: Path) -> None:
overall_path, subgroup_path = _write_gossis_summary_files(tmp_path)
table = build_gossis_validation_table(overall_path, min_hospital_admissions=500)
subgroup = build_gossis_validation_subgroup_frame(subgroup_path, min_hospital_admissions=500)
assert list(table["method"]) == [
"standard",
"missingness_aware",
"weighted_missingness_aware",
"shrunk_weighted_missingness_aware",
]
assert table.loc[table["method"] == "standard", "empirical_coverage"].item() == 0.89
assert set(subgroup["method"]) == {"standard", "missingness_aware", "weighted_missingness_aware"}
assert set(subgroup["group_label"]) == {"low", "medium", "high"}
def test_build_gossis_validation_frames_ignore_opt_in_methods(tmp_path: Path) -> None:
overall_path, subgroup_path = _write_gossis_summary_files_with_opt_in_methods(tmp_path)
table = build_gossis_validation_table(overall_path, min_hospital_admissions=500)
subgroup = build_gossis_validation_subgroup_frame(subgroup_path, min_hospital_admissions=500)
assert "learned_partition" not in set(table["method"])
assert "gibbs_general" not in set(table["method"])
assert "learned_partition" not in set(subgroup["method"])
assert "gibbs_general" not in set(subgroup["method"])
def test_external_baseline_dependency_is_documented_as_optional() -> None:
pyproject_text = Path("pyproject.toml").read_text(encoding="utf-8")
readme_text = Path("README.md").read_text(encoding="utf-8")
assert "[project.optional-dependencies]" in pyproject_text
assert "conditionalconformal==0.0.5" in pyproject_text
assert "external-baselines" in pyproject_text
assert "conditionalconformal==0.0.5" in readme_text
def test_main_writes_requested_paper_artifacts(tmp_path: Path) -> None:
_write_physionet_dropout_run(tmp_path / "none", "", 0.0)
_write_physionet_dropout_run(tmp_path / "rd03", "__random_drop", 0.02)
_write_physionet_dropout_run(tmp_path / "rd05", "__random_drop", 0.04)
summary_path = _write_dropout_summary(tmp_path / "dropout_grid_summary.csv")
model_summary_path = _write_model_dropout_summary(tmp_path / "model_dropout_grid_summary.csv")
overall_path, subgroup_path = _write_gossis_summary_files(tmp_path / "gossis")
output_dir = tmp_path / "paper"
main(
[
"--physionet-none-dir",
str(tmp_path / "none"),
"--physionet-rd03-dir",
str(tmp_path / "rd03"),
"--physionet-rd05-dir",
str(tmp_path / "rd05"),
"--physionet-dropout-summary-path",
str(summary_path),
"--physionet-model-dropout-summary-path",
str(model_summary_path),
"--gossis-overall-path",
str(overall_path),
"--gossis-subgroup-path",
str(subgroup_path),
"--output-dir",
str(output_dir),
]
)
expected_files = {
"figure1_method_schematic.png",
"figure1_method_schematic.pdf",
"figure2_physionet_stress_test.png",
"figure2_physionet_stress_test.pdf",
"figureA2_physionet_model_auroc_dropout.png",
"figureA2_physionet_model_auroc_dropout.pdf",
"figure3_gossis_subgroup_coverage.png",
"figure3_gossis_subgroup_coverage.pdf",
"table1_physionet_main_results.csv",
"table1_physionet_main_results.png",
"table1_physionet_main_results.tex",
"tableA1_physionet_full_results.csv",
"tableA1_physionet_full_results.png",
"tableA1_physionet_full_results.tex",
"table2_gossis_validation_summary.csv",
"table2_gossis_validation_summary.png",
"table2_gossis_validation_summary.tex",
"artifact_manifest.json",
}
assert expected_files <= {path.name for path in output_dir.iterdir()}