| from __future__ import annotations |
|
|
| import json |
| from pathlib import Path |
|
|
| import pandas as pd |
|
|
| from sepsis_mcp import gossis_experiment, gossis_model_swap_analysis, mimic4_experiment, score_design_ablation |
| from sepsis_mcp.experiment_log import ( |
| DEFAULT_EXPERIMENT_LOG_PATH, |
| append_gossis_output_entry, |
| append_mimic_output_entry, |
| append_non_clinical_output_entry, |
| append_run_entry, |
| append_summary_block, |
| should_auto_append_experiment_log, |
| ) |
|
|
|
|
| def test_append_run_entry_and_summary_are_idempotent(tmp_path: Path) -> None: |
| log_path = tmp_path / "EXPERIMENT_LOG.md" |
| result_file = tmp_path / "result.csv" |
| result_file.write_text("ok\n", encoding="utf-8") |
|
|
| append_run_entry( |
| log_path, |
| title="Smoke run", |
| config_command="cmd", |
| result_file=result_file, |
| setup_note="note", |
| core_result="core", |
| table_headers=["Method", "Coverage"], |
| table_rows=[["standard", "0.900"]], |
| interpretation="interp", |
| method_takeaway="takeaway", |
| ) |
| append_run_entry( |
| log_path, |
| title="Smoke run duplicate", |
| config_command="cmd", |
| result_file=result_file, |
| setup_note="note", |
| core_result="core", |
| table_headers=["Method", "Coverage"], |
| table_rows=[["standard", "0.900"]], |
| interpretation="interp", |
| method_takeaway="takeaway", |
| ) |
| append_summary_block( |
| log_path, |
| title="Smoke Summary", |
| claim_status="validated", |
| bullets=["a"], |
| ) |
| append_summary_block( |
| log_path, |
| title="Smoke Summary", |
| claim_status="validated", |
| bullets=["a"], |
| ) |
|
|
| text = log_path.read_text(encoding="utf-8") |
| assert text.count("## Run #") == 1 |
| assert text.count("## Smoke Summary") == 1 |
| assert "src/sepsis_mcp" in text |
|
|
|
|
| def test_default_root_log_is_skipped_under_pytest(monkeypatch) -> None: |
| monkeypatch.setenv("PYTEST_CURRENT_TEST", "tests/test_experiment_log.py::fake") |
| assert should_auto_append_experiment_log(DEFAULT_EXPERIMENT_LOG_PATH) is False |
| assert should_auto_append_experiment_log(Path("/tmp/custom-log.md")) is True |
|
|
|
|
| def test_append_gossis_output_entry_records_classification_run(tmp_path: Path) -> None: |
| output_dir = tmp_path / "gossis" |
| output_dir.mkdir() |
| pd.DataFrame( |
| [ |
| { |
| "method": "standard", |
| "empirical_coverage": 0.89, |
| "max_group_coverage_gap": 0.05, |
| "average_set_size": 0.95, |
| }, |
| { |
| "method": "missingness_aware", |
| "empirical_coverage": 0.90, |
| "max_group_coverage_gap": 0.03, |
| "average_set_size": 0.96, |
| }, |
| ] |
| ).to_csv(output_dir / "overall_summary.csv", index=False) |
| (output_dir / "metrics.json").write_text( |
| json.dumps( |
| { |
| "config": { |
| "random_state": 3, |
| "model_type": "xgboost", |
| "target_column": None, |
| "enable_external_baselines": False, |
| "enable_learned_partition": False, |
| "enable_within_group_weighted_missingness": True, |
| } |
| } |
| ), |
| encoding="utf-8", |
| ) |
|
|
| log_path = tmp_path / "EXPERIMENT_LOG.md" |
| append_gossis_output_entry(log_path, output_dir) |
| text = log_path.read_text(encoding="utf-8") |
| assert "GOSSIS classification run" in text |
| assert "within-group weighted CP" in text |
| assert "missingness_aware" in text |
|
|
|
|
| def test_append_mimic_output_entry_records_leave_one_out_run(tmp_path: Path) -> None: |
| output_dir = tmp_path / "mimic" |
| output_dir.mkdir() |
| pd.DataFrame( |
| [ |
| { |
| "method": "standard", |
| "empirical_coverage_mean": 0.89, |
| "max_group_coverage_gap_mean": 0.06, |
| "average_set_size_mean": 0.94, |
| }, |
| { |
| "method": "missingness_aware", |
| "empirical_coverage_mean": 0.91, |
| "max_group_coverage_gap_mean": 0.03, |
| "average_set_size_mean": 0.95, |
| }, |
| ] |
| ).to_csv(output_dir / "repeated_summary.csv", index=False) |
| (output_dir / "config.json").write_text( |
| json.dumps({"model_type": "xgboost", "rotations": 3}), |
| encoding="utf-8", |
| ) |
|
|
| log_path = tmp_path / "EXPERIMENT_LOG.md" |
| append_mimic_output_entry(log_path, output_dir) |
| text = log_path.read_text(encoding="utf-8") |
| assert "MIMIC-IV leave-one-unit-out run" in text |
| assert "missingness_aware" in text |
|
|
|
|
| def test_append_non_clinical_output_entry_records_dataset_run(tmp_path: Path) -> None: |
| output_dir = tmp_path / "airquality" |
| output_dir.mkdir() |
| pd.DataFrame( |
| [ |
| { |
| "method": "standard", |
| "coverage_mean": 0.89, |
| "max_gap_mean": 0.06, |
| "set_size_mean": 0.94, |
| }, |
| { |
| "method": "missingness_aware", |
| "coverage_mean": 0.91, |
| "max_gap_mean": 0.03, |
| "set_size_mean": 0.95, |
| }, |
| ] |
| ).to_csv(output_dir / "summary.csv", index=False) |
| (output_dir / "config.json").write_text( |
| json.dumps({"dataset": "airquality", "seeds": 2}), |
| encoding="utf-8", |
| ) |
|
|
| log_path = tmp_path / "EXPERIMENT_LOG.md" |
| append_non_clinical_output_entry(log_path, output_dir) |
| text = log_path.read_text(encoding="utf-8") |
| assert "Non-clinical experiment" in text |
| assert "airquality" in text |
| assert "missingness_aware" in text |
|
|
|
|
| def test_gossis_main_appends_experiment_log(tmp_path: Path, monkeypatch) -> None: |
| output_dir = tmp_path / "gossis-auto" |
| log_path = tmp_path / "EXPERIMENT_LOG.md" |
|
|
| def fake_run(config): |
| output_dir.mkdir(parents=True, exist_ok=True) |
| pd.DataFrame( |
| [ |
| { |
| "method": "standard", |
| "empirical_coverage": 0.88, |
| "max_group_coverage_gap": 0.06, |
| "average_set_size": 0.94, |
| } |
| ] |
| ).to_csv(output_dir / "overall_summary.csv", index=False) |
| (output_dir / "metrics.json").write_text( |
| json.dumps({"config": {"random_state": config.random_state, "model_type": config.model_type}}), |
| encoding="utf-8", |
| ) |
| return {"overall_summary_path": output_dir / "overall_summary.csv"} |
|
|
| monkeypatch.setattr(gossis_experiment, "run_gossis_experiment", fake_run) |
| gossis_experiment.main( |
| [ |
| "--data-root", |
| str(tmp_path), |
| "--output-dir", |
| str(output_dir), |
| "--experiment-log-path", |
| str(log_path), |
| ] |
| ) |
| assert "GOSSIS classification run" in log_path.read_text(encoding="utf-8") |
|
|
|
|
| def test_mimic_main_appends_experiment_log(tmp_path: Path, monkeypatch) -> None: |
| output_dir = tmp_path / "mimic-auto" |
| log_path = tmp_path / "EXPERIMENT_LOG.md" |
|
|
| def fake_run(**kwargs): |
| output_dir.mkdir(parents=True, exist_ok=True) |
| pd.DataFrame( |
| [ |
| { |
| "method": "standard", |
| "empirical_coverage_mean": 0.89, |
| "max_group_coverage_gap_mean": 0.06, |
| "average_set_size_mean": 0.94, |
| } |
| ] |
| ).to_csv(output_dir / "repeated_summary.csv", index=False) |
| return {"repeated_summary": output_dir / "repeated_summary.csv"} |
|
|
| monkeypatch.setattr(mimic4_experiment, "run_mimic4_leave_one_out", fake_run) |
| mimic4_experiment.main( |
| [ |
| "--csv-path", |
| str(tmp_path / "mimic.csv"), |
| "--output-dir", |
| str(output_dir), |
| "--experiment-log-path", |
| str(log_path), |
| ] |
| ) |
| assert "MIMIC-IV leave-one-unit-out run" in log_path.read_text(encoding="utf-8") |
|
|
|
|
| def test_model_swap_and_score_design_mains_append_experiment_log(tmp_path: Path, monkeypatch) -> None: |
| swap_output = tmp_path / "swap" |
| score_output = tmp_path / "score" |
| log_path = tmp_path / "EXPERIMENT_LOG.md" |
|
|
| def fake_swap_run(config): |
| swap_output.mkdir(parents=True, exist_ok=True) |
| pd.DataFrame( |
| [ |
| { |
| "calibrate_model": "xgboost", |
| "deploy_model": "logistic_regression", |
| "grouping": "coverage_gap_variable", |
| "mean_gap": 0.05, |
| }, |
| { |
| "calibrate_model": "xgboost", |
| "deploy_model": "logistic_regression", |
| "grouping": "coverage_gap_variable_within_group_weighted", |
| "mean_gap": 0.04, |
| }, |
| ] |
| ).to_csv(swap_output / "swap_aggregate.csv", index=False) |
| return {"swap_aggregate_path": swap_output / "swap_aggregate.csv"} |
|
|
| def fake_score_run(**kwargs): |
| score_output.mkdir(parents=True, exist_ok=True) |
| pd.DataFrame( |
| [ |
| { |
| "method": "original_standard", |
| "empirical_coverage_mean": 0.89, |
| "max_group_coverage_gap_mean": 0.05, |
| "average_set_size_mean": 0.94, |
| }, |
| { |
| "method": "original_mondrian", |
| "empirical_coverage_mean": 0.90, |
| "max_group_coverage_gap_mean": 0.03, |
| "average_set_size_mean": 0.95, |
| }, |
| { |
| "method": "scaled_global_missing_rate_mondrian", |
| "empirical_coverage_mean": 0.91, |
| "max_group_coverage_gap_mean": 0.02, |
| "average_set_size_mean": 0.96, |
| }, |
| { |
| "method": "scaled_selected_variable_standard", |
| "empirical_coverage_mean": 0.90, |
| "max_group_coverage_gap_mean": 0.03, |
| "average_set_size_mean": 0.95, |
| }, |
| { |
| "method": "znorm_quantile_mondrian", |
| "empirical_coverage_mean": 0.90, |
| "max_group_coverage_gap_mean": 0.03, |
| "average_set_size_mean": 0.95, |
| }, |
| { |
| "method": "recalibrated_top5_ks_mondrian", |
| "empirical_coverage_mean": 0.89, |
| "max_group_coverage_gap_mean": 0.04, |
| "average_set_size_mean": 0.95, |
| }, |
| ] |
| ).to_csv(score_output / "repeated_summary.csv", index=False) |
| (score_output / "config.json").write_text( |
| json.dumps({"model_type": "xgboost", "random_state_grid": [0, 1]}), |
| encoding="utf-8", |
| ) |
| return {"repeated_summary_path": score_output / "repeated_summary.csv"} |
|
|
| monkeypatch.setattr(gossis_model_swap_analysis, "run_model_swap_experiment", fake_swap_run) |
| monkeypatch.setattr(score_design_ablation, "run_gossis_score_design_ablation", fake_score_run) |
|
|
| gossis_model_swap_analysis.main( |
| [ |
| "--data-root", |
| str(tmp_path), |
| "--output-dir", |
| str(swap_output), |
| "--experiment-log-path", |
| str(log_path), |
| ] |
| ) |
| score_design_ablation.main( |
| [ |
| "gossis", |
| "--data-root", |
| str(tmp_path), |
| "--output-dir", |
| str(score_output), |
| "--experiment-log-path", |
| str(log_path), |
| ] |
| ) |
|
|
| text = log_path.read_text(encoding="utf-8") |
| assert "GOSSIS model-swap portability run" in text |
| assert "GOSSIS score-design ablation" in text |
|
|