misscp / tests /test_experiment_log.py
Anonymous
Initial anonymous MissCP release
32f5a65
from __future__ import annotations
import json
from pathlib import Path
import pandas as pd
from sepsis_mcp import gossis_experiment, gossis_model_swap_analysis, mimic4_experiment, score_design_ablation
from sepsis_mcp.experiment_log import (
DEFAULT_EXPERIMENT_LOG_PATH,
append_gossis_output_entry,
append_mimic_output_entry,
append_non_clinical_output_entry,
append_run_entry,
append_summary_block,
should_auto_append_experiment_log,
)
def test_append_run_entry_and_summary_are_idempotent(tmp_path: Path) -> None:
log_path = tmp_path / "EXPERIMENT_LOG.md"
result_file = tmp_path / "result.csv"
result_file.write_text("ok\n", encoding="utf-8")
append_run_entry(
log_path,
title="Smoke run",
config_command="cmd",
result_file=result_file,
setup_note="note",
core_result="core",
table_headers=["Method", "Coverage"],
table_rows=[["standard", "0.900"]],
interpretation="interp",
method_takeaway="takeaway",
)
append_run_entry(
log_path,
title="Smoke run duplicate",
config_command="cmd",
result_file=result_file,
setup_note="note",
core_result="core",
table_headers=["Method", "Coverage"],
table_rows=[["standard", "0.900"]],
interpretation="interp",
method_takeaway="takeaway",
)
append_summary_block(
log_path,
title="Smoke Summary",
claim_status="validated",
bullets=["a"],
)
append_summary_block(
log_path,
title="Smoke Summary",
claim_status="validated",
bullets=["a"],
)
text = log_path.read_text(encoding="utf-8")
assert text.count("## Run #") == 1
assert text.count("## Smoke Summary") == 1
assert "src/sepsis_mcp" in text
def test_default_root_log_is_skipped_under_pytest(monkeypatch) -> None:
monkeypatch.setenv("PYTEST_CURRENT_TEST", "tests/test_experiment_log.py::fake")
assert should_auto_append_experiment_log(DEFAULT_EXPERIMENT_LOG_PATH) is False
assert should_auto_append_experiment_log(Path("/tmp/custom-log.md")) is True
def test_append_gossis_output_entry_records_classification_run(tmp_path: Path) -> None:
output_dir = tmp_path / "gossis"
output_dir.mkdir()
pd.DataFrame(
[
{
"method": "standard",
"empirical_coverage": 0.89,
"max_group_coverage_gap": 0.05,
"average_set_size": 0.95,
},
{
"method": "missingness_aware",
"empirical_coverage": 0.90,
"max_group_coverage_gap": 0.03,
"average_set_size": 0.96,
},
]
).to_csv(output_dir / "overall_summary.csv", index=False)
(output_dir / "metrics.json").write_text(
json.dumps(
{
"config": {
"random_state": 3,
"model_type": "xgboost",
"target_column": None,
"enable_external_baselines": False,
"enable_learned_partition": False,
"enable_within_group_weighted_missingness": True,
}
}
),
encoding="utf-8",
)
log_path = tmp_path / "EXPERIMENT_LOG.md"
append_gossis_output_entry(log_path, output_dir)
text = log_path.read_text(encoding="utf-8")
assert "GOSSIS classification run" in text
assert "within-group weighted CP" in text
assert "missingness_aware" in text
def test_append_mimic_output_entry_records_leave_one_out_run(tmp_path: Path) -> None:
output_dir = tmp_path / "mimic"
output_dir.mkdir()
pd.DataFrame(
[
{
"method": "standard",
"empirical_coverage_mean": 0.89,
"max_group_coverage_gap_mean": 0.06,
"average_set_size_mean": 0.94,
},
{
"method": "missingness_aware",
"empirical_coverage_mean": 0.91,
"max_group_coverage_gap_mean": 0.03,
"average_set_size_mean": 0.95,
},
]
).to_csv(output_dir / "repeated_summary.csv", index=False)
(output_dir / "config.json").write_text(
json.dumps({"model_type": "xgboost", "rotations": 3}),
encoding="utf-8",
)
log_path = tmp_path / "EXPERIMENT_LOG.md"
append_mimic_output_entry(log_path, output_dir)
text = log_path.read_text(encoding="utf-8")
assert "MIMIC-IV leave-one-unit-out run" in text
assert "missingness_aware" in text
def test_append_non_clinical_output_entry_records_dataset_run(tmp_path: Path) -> None:
output_dir = tmp_path / "airquality"
output_dir.mkdir()
pd.DataFrame(
[
{
"method": "standard",
"coverage_mean": 0.89,
"max_gap_mean": 0.06,
"set_size_mean": 0.94,
},
{
"method": "missingness_aware",
"coverage_mean": 0.91,
"max_gap_mean": 0.03,
"set_size_mean": 0.95,
},
]
).to_csv(output_dir / "summary.csv", index=False)
(output_dir / "config.json").write_text(
json.dumps({"dataset": "airquality", "seeds": 2}),
encoding="utf-8",
)
log_path = tmp_path / "EXPERIMENT_LOG.md"
append_non_clinical_output_entry(log_path, output_dir)
text = log_path.read_text(encoding="utf-8")
assert "Non-clinical experiment" in text
assert "airquality" in text
assert "missingness_aware" in text
def test_gossis_main_appends_experiment_log(tmp_path: Path, monkeypatch) -> None:
output_dir = tmp_path / "gossis-auto"
log_path = tmp_path / "EXPERIMENT_LOG.md"
def fake_run(config):
output_dir.mkdir(parents=True, exist_ok=True)
pd.DataFrame(
[
{
"method": "standard",
"empirical_coverage": 0.88,
"max_group_coverage_gap": 0.06,
"average_set_size": 0.94,
}
]
).to_csv(output_dir / "overall_summary.csv", index=False)
(output_dir / "metrics.json").write_text(
json.dumps({"config": {"random_state": config.random_state, "model_type": config.model_type}}),
encoding="utf-8",
)
return {"overall_summary_path": output_dir / "overall_summary.csv"}
monkeypatch.setattr(gossis_experiment, "run_gossis_experiment", fake_run)
gossis_experiment.main(
[
"--data-root",
str(tmp_path),
"--output-dir",
str(output_dir),
"--experiment-log-path",
str(log_path),
]
)
assert "GOSSIS classification run" in log_path.read_text(encoding="utf-8")
def test_mimic_main_appends_experiment_log(tmp_path: Path, monkeypatch) -> None:
output_dir = tmp_path / "mimic-auto"
log_path = tmp_path / "EXPERIMENT_LOG.md"
def fake_run(**kwargs):
output_dir.mkdir(parents=True, exist_ok=True)
pd.DataFrame(
[
{
"method": "standard",
"empirical_coverage_mean": 0.89,
"max_group_coverage_gap_mean": 0.06,
"average_set_size_mean": 0.94,
}
]
).to_csv(output_dir / "repeated_summary.csv", index=False)
return {"repeated_summary": output_dir / "repeated_summary.csv"}
monkeypatch.setattr(mimic4_experiment, "run_mimic4_leave_one_out", fake_run)
mimic4_experiment.main(
[
"--csv-path",
str(tmp_path / "mimic.csv"),
"--output-dir",
str(output_dir),
"--experiment-log-path",
str(log_path),
]
)
assert "MIMIC-IV leave-one-unit-out run" in log_path.read_text(encoding="utf-8")
def test_model_swap_and_score_design_mains_append_experiment_log(tmp_path: Path, monkeypatch) -> None:
swap_output = tmp_path / "swap"
score_output = tmp_path / "score"
log_path = tmp_path / "EXPERIMENT_LOG.md"
def fake_swap_run(config):
swap_output.mkdir(parents=True, exist_ok=True)
pd.DataFrame(
[
{
"calibrate_model": "xgboost",
"deploy_model": "logistic_regression",
"grouping": "coverage_gap_variable",
"mean_gap": 0.05,
},
{
"calibrate_model": "xgboost",
"deploy_model": "logistic_regression",
"grouping": "coverage_gap_variable_within_group_weighted",
"mean_gap": 0.04,
},
]
).to_csv(swap_output / "swap_aggregate.csv", index=False)
return {"swap_aggregate_path": swap_output / "swap_aggregate.csv"}
def fake_score_run(**kwargs):
score_output.mkdir(parents=True, exist_ok=True)
pd.DataFrame(
[
{
"method": "original_standard",
"empirical_coverage_mean": 0.89,
"max_group_coverage_gap_mean": 0.05,
"average_set_size_mean": 0.94,
},
{
"method": "original_mondrian",
"empirical_coverage_mean": 0.90,
"max_group_coverage_gap_mean": 0.03,
"average_set_size_mean": 0.95,
},
{
"method": "scaled_global_missing_rate_mondrian",
"empirical_coverage_mean": 0.91,
"max_group_coverage_gap_mean": 0.02,
"average_set_size_mean": 0.96,
},
{
"method": "scaled_selected_variable_standard",
"empirical_coverage_mean": 0.90,
"max_group_coverage_gap_mean": 0.03,
"average_set_size_mean": 0.95,
},
{
"method": "znorm_quantile_mondrian",
"empirical_coverage_mean": 0.90,
"max_group_coverage_gap_mean": 0.03,
"average_set_size_mean": 0.95,
},
{
"method": "recalibrated_top5_ks_mondrian",
"empirical_coverage_mean": 0.89,
"max_group_coverage_gap_mean": 0.04,
"average_set_size_mean": 0.95,
},
]
).to_csv(score_output / "repeated_summary.csv", index=False)
(score_output / "config.json").write_text(
json.dumps({"model_type": "xgboost", "random_state_grid": [0, 1]}),
encoding="utf-8",
)
return {"repeated_summary_path": score_output / "repeated_summary.csv"}
monkeypatch.setattr(gossis_model_swap_analysis, "run_model_swap_experiment", fake_swap_run)
monkeypatch.setattr(score_design_ablation, "run_gossis_score_design_ablation", fake_score_run)
gossis_model_swap_analysis.main(
[
"--data-root",
str(tmp_path),
"--output-dir",
str(swap_output),
"--experiment-log-path",
str(log_path),
]
)
score_design_ablation.main(
[
"gossis",
"--data-root",
str(tmp_path),
"--output-dir",
str(score_output),
"--experiment-log-path",
str(log_path),
]
)
text = log_path.read_text(encoding="utf-8")
assert "GOSSIS model-swap portability run" in text
assert "GOSSIS score-design ablation" in text