| from __future__ import annotations |
|
|
| import json |
| from pathlib import Path |
|
|
| import pandas as pd |
|
|
| from sepsis_mcp.paper_artifacts import ( |
| build_physionet_appendix_table, |
| build_physionet_gap_plot_frame, |
| build_physionet_model_auroc_frame, |
| build_gossis_validation_subgroup_frame, |
| build_gossis_validation_table, |
| build_physionet_main_table, |
| main, |
| ) |
|
|
|
|
| def _write_physionet_dropout_run(base_dir: Path, experiment_suffix: str, coverage_shift: float) -> None: |
| base_dir.mkdir(parents=True, exist_ok=True) |
| metrics = { |
| "config": {"alpha": 0.1}, |
| "experiments": { |
| f"A_to_A{experiment_suffix}": { |
| "standard": { |
| "empirical_coverage": 0.91 - coverage_shift, |
| "average_set_size": 0.95 - coverage_shift, |
| "target_coverage": 0.9, |
| }, |
| "missingness_aware": { |
| "empirical_coverage": 0.89 + coverage_shift, |
| "average_set_size": 0.96, |
| "target_coverage": 0.9, |
| }, |
| "weighted_missingness_aware": { |
| "empirical_coverage": 0.9, |
| "average_set_size": 0.94, |
| "target_coverage": 0.9, |
| }, |
| "shrunk_weighted_missingness_aware": { |
| "empirical_coverage": 0.905, |
| "average_set_size": 0.93, |
| "target_coverage": 0.9, |
| }, |
| }, |
| f"A_to_B{experiment_suffix}": { |
| "standard": { |
| "empirical_coverage": 0.9 - coverage_shift, |
| "average_set_size": 0.91 - coverage_shift, |
| "target_coverage": 0.9, |
| }, |
| "missingness_aware": { |
| "empirical_coverage": 0.88 + coverage_shift, |
| "average_set_size": 0.915 + coverage_shift, |
| "target_coverage": 0.9, |
| }, |
| "weighted_missingness_aware": { |
| "empirical_coverage": 0.885 + coverage_shift / 2, |
| "average_set_size": 0.9, |
| "target_coverage": 0.9, |
| }, |
| "shrunk_weighted_missingness_aware": { |
| "empirical_coverage": 0.89 + coverage_shift / 3, |
| "average_set_size": 0.905, |
| "target_coverage": 0.9, |
| }, |
| }, |
| }, |
| } |
| (base_dir / "metrics.json").write_text(json.dumps(metrics), encoding="utf-8") |
|
|
|
|
| def _write_dropout_summary(path: Path) -> Path: |
| pd.DataFrame( |
| [ |
| {"condition": "none", "mask_rate": 0.0, "method": "standard", "empirical_coverage": 0.8976, "target_coverage": 0.9, "absolute_coverage_gap": 0.0024, "average_set_size": 0.9126}, |
| {"condition": "none", "mask_rate": 0.0, "method": "missingness_aware", "empirical_coverage": 0.8772, "target_coverage": 0.9, "absolute_coverage_gap": 0.0228, "average_set_size": 0.8914}, |
| {"condition": "drop=0.3", "mask_rate": 0.3, "method": "standard", "empirical_coverage": 0.8847, "target_coverage": 0.9, "absolute_coverage_gap": 0.0153, "average_set_size": 0.8971}, |
| {"condition": "drop=0.3", "mask_rate": 0.3, "method": "missingness_aware", "empirical_coverage": 0.8962, "target_coverage": 0.9, "absolute_coverage_gap": 0.0038, "average_set_size": 0.9104}, |
| {"condition": "drop=0.5", "mask_rate": 0.5, "method": "standard", "empirical_coverage": 0.8661, "target_coverage": 0.9, "absolute_coverage_gap": 0.0339, "average_set_size": 0.8794}, |
| {"condition": "drop=0.5", "mask_rate": 0.5, "method": "missingness_aware", "empirical_coverage": 0.8993, "target_coverage": 0.9, "absolute_coverage_gap": 0.0007, "average_set_size": 0.9149}, |
| ] |
| ).to_csv(path, index=False) |
| return path |
|
|
|
|
| def _write_model_dropout_summary(path: Path) -> Path: |
| pd.DataFrame( |
| [ |
| {"condition": "none", "mask_rate": 0.0, "model_type": "xgboost", "auroc": 0.81, "auprc": 0.10}, |
| {"condition": "drop=0.3", "mask_rate": 0.3, "model_type": "xgboost", "auroc": 0.78, "auprc": 0.09}, |
| {"condition": "none", "mask_rate": 0.0, "model_type": "sklearn_gbdt", "auroc": 0.79, "auprc": 0.08}, |
| {"condition": "drop=0.3", "mask_rate": 0.3, "model_type": "sklearn_gbdt", "auroc": 0.75, "auprc": 0.07}, |
| {"condition": "none", "mask_rate": 0.0, "model_type": "logistic_regression", "auroc": 0.72, "auprc": 0.05}, |
| {"condition": "drop=0.3", "mask_rate": 0.3, "model_type": "logistic_regression", "auroc": 0.69, "auprc": 0.04}, |
| ] |
| ).to_csv(path, index=False) |
| return path |
|
|
|
|
| def _write_gossis_summary_files(base_dir: Path) -> tuple[Path, Path]: |
| base_dir.mkdir(parents=True, exist_ok=True) |
| overall_path = base_dir / "overall_summary.csv" |
| subgroup_path = base_dir / "subgroup_summary.csv" |
| pd.DataFrame( |
| [ |
| { |
| "method": "standard", |
| "min_hospital_admissions": 500, |
| "empirical_coverage": 0.895, |
| "average_set_size": 0.95, |
| "max_group_coverage_gap": 0.08, |
| "worst_hospital_coverage": 0.84, |
| }, |
| { |
| "method": "standard", |
| "min_hospital_admissions": 500, |
| "empirical_coverage": 0.885, |
| "average_set_size": 0.94, |
| "max_group_coverage_gap": 0.10, |
| "worst_hospital_coverage": 0.83, |
| }, |
| { |
| "method": "missingness_aware", |
| "min_hospital_admissions": 500, |
| "empirical_coverage": 0.898, |
| "average_set_size": 0.96, |
| "max_group_coverage_gap": 0.02, |
| "worst_hospital_coverage": 0.835, |
| }, |
| { |
| "method": "weighted_missingness_aware", |
| "min_hospital_admissions": 500, |
| "empirical_coverage": 0.9, |
| "average_set_size": 0.955, |
| "max_group_coverage_gap": 0.05, |
| "worst_hospital_coverage": 0.845, |
| }, |
| { |
| "method": "shrunk_weighted_missingness_aware", |
| "min_hospital_admissions": 500, |
| "empirical_coverage": 0.897, |
| "average_set_size": 0.952, |
| "max_group_coverage_gap": 0.07, |
| "worst_hospital_coverage": 0.844, |
| }, |
| ] |
| ).to_csv(overall_path, index=False) |
| pd.DataFrame( |
| [ |
| {"method": "standard", "min_hospital_admissions": 500, "group_label": "low", "coverage": 0.82}, |
| {"method": "standard", "min_hospital_admissions": 500, "group_label": "medium", "coverage": 0.92}, |
| {"method": "standard", "min_hospital_admissions": 500, "group_label": "high", "coverage": 0.94}, |
| {"method": "missingness_aware", "min_hospital_admissions": 500, "group_label": "low", "coverage": 0.89}, |
| {"method": "missingness_aware", "min_hospital_admissions": 500, "group_label": "medium", "coverage": 0.90}, |
| {"method": "missingness_aware", "min_hospital_admissions": 500, "group_label": "high", "coverage": 0.91}, |
| {"method": "weighted_missingness_aware", "min_hospital_admissions": 500, "group_label": "low", "coverage": 0.86}, |
| {"method": "weighted_missingness_aware", "min_hospital_admissions": 500, "group_label": "medium", "coverage": 0.91}, |
| {"method": "weighted_missingness_aware", "min_hospital_admissions": 500, "group_label": "high", "coverage": 0.93}, |
| ] |
| ).to_csv(subgroup_path, index=False) |
| return overall_path, subgroup_path |
|
|
|
|
| def _write_gossis_summary_files_with_opt_in_methods(base_dir: Path) -> tuple[Path, Path]: |
| overall_path, subgroup_path = _write_gossis_summary_files(base_dir) |
| overall = pd.read_csv(overall_path) |
| subgroup = pd.read_csv(subgroup_path) |
| pd.concat( |
| [ |
| overall, |
| pd.DataFrame( |
| [ |
| { |
| "method": "learned_partition", |
| "min_hospital_admissions": 500, |
| "empirical_coverage": 0.901, |
| "average_set_size": 0.958, |
| "max_group_coverage_gap": 0.03, |
| "worst_hospital_coverage": 0.84, |
| }, |
| { |
| "method": "gibbs_general", |
| "min_hospital_admissions": 500, |
| "empirical_coverage": 0.902, |
| "average_set_size": 0.959, |
| "max_group_coverage_gap": 0.04, |
| "worst_hospital_coverage": 0.845, |
| }, |
| ] |
| ), |
| ], |
| ignore_index=True, |
| ).to_csv(overall_path, index=False) |
| pd.concat( |
| [ |
| subgroup, |
| pd.DataFrame( |
| [ |
| {"method": "learned_partition", "min_hospital_admissions": 500, "group_label": "low", "coverage": 0.88}, |
| {"method": "gibbs_general", "min_hospital_admissions": 500, "group_label": "low", "coverage": 0.87}, |
| ] |
| ), |
| ], |
| ignore_index=True, |
| ).to_csv(subgroup_path, index=False) |
| return overall_path, subgroup_path |
|
|
|
|
| def test_build_physionet_tables_and_plot_frame(tmp_path: Path) -> None: |
| _write_physionet_dropout_run(tmp_path / "none", "", 0.0) |
| _write_physionet_dropout_run(tmp_path / "rd03", "__random_drop", 0.02) |
| _write_physionet_dropout_run(tmp_path / "rd05", "__random_drop", 0.04) |
| summary_path = _write_dropout_summary(tmp_path / "dropout_grid_summary.csv") |
| model_summary_path = _write_model_dropout_summary(tmp_path / "model_dropout_grid_summary.csv") |
|
|
| table = build_physionet_main_table(tmp_path / "none", tmp_path / "rd03", tmp_path / "rd05") |
| appendix = build_physionet_appendix_table(tmp_path / "none", tmp_path / "rd03", tmp_path / "rd05") |
| stress = build_physionet_gap_plot_frame(summary_path) |
| model_auroc = build_physionet_model_auroc_frame(model_summary_path) |
|
|
| assert len(table) == 18 |
| assert list(table.columns) == [ |
| "setting", |
| "experiment", |
| "condition", |
| "method", |
| "empirical_coverage", |
| "average_set_size", |
| "absolute_coverage_gap", |
| ] |
| assert set(table["method"]) == {"standard", "missingness_aware", "weighted_missingness_aware"} |
| assert len(appendix) == 24 |
| assert "shrunk_weighted_missingness_aware" in set(appendix["method"]) |
| assert set(stress["method"]) == {"standard", "missingness_aware"} |
| assert set(stress["condition"]) == {"none", "drop=0.3", "drop=0.5"} |
| assert list(stress.columns) == ["condition", "mask_rate", "method", "empirical_coverage", "target_coverage", "absolute_coverage_gap", "average_set_size"] |
| assert (stress["absolute_coverage_gap"] >= 0).all() |
| assert set(model_auroc["model_type"]) == {"xgboost", "sklearn_gbdt", "logistic_regression"} |
| assert list(model_auroc.columns) == ["condition", "mask_rate", "model_type", "auroc", "auprc"] |
|
|
|
|
| def test_build_gossis_validation_frames(tmp_path: Path) -> None: |
| overall_path, subgroup_path = _write_gossis_summary_files(tmp_path) |
|
|
| table = build_gossis_validation_table(overall_path, min_hospital_admissions=500) |
| subgroup = build_gossis_validation_subgroup_frame(subgroup_path, min_hospital_admissions=500) |
|
|
| assert list(table["method"]) == [ |
| "standard", |
| "missingness_aware", |
| "weighted_missingness_aware", |
| "shrunk_weighted_missingness_aware", |
| ] |
| assert table.loc[table["method"] == "standard", "empirical_coverage"].item() == 0.89 |
| assert set(subgroup["method"]) == {"standard", "missingness_aware", "weighted_missingness_aware"} |
| assert set(subgroup["group_label"]) == {"low", "medium", "high"} |
|
|
|
|
| def test_build_gossis_validation_frames_ignore_opt_in_methods(tmp_path: Path) -> None: |
| overall_path, subgroup_path = _write_gossis_summary_files_with_opt_in_methods(tmp_path) |
|
|
| table = build_gossis_validation_table(overall_path, min_hospital_admissions=500) |
| subgroup = build_gossis_validation_subgroup_frame(subgroup_path, min_hospital_admissions=500) |
|
|
| assert "learned_partition" not in set(table["method"]) |
| assert "gibbs_general" not in set(table["method"]) |
| assert "learned_partition" not in set(subgroup["method"]) |
| assert "gibbs_general" not in set(subgroup["method"]) |
|
|
|
|
| def test_external_baseline_dependency_is_documented_as_optional() -> None: |
| pyproject_text = Path("pyproject.toml").read_text(encoding="utf-8") |
| readme_text = Path("README.md").read_text(encoding="utf-8") |
|
|
| assert "[project.optional-dependencies]" in pyproject_text |
| assert "conditionalconformal==0.0.5" in pyproject_text |
| assert "external-baselines" in pyproject_text |
| assert "conditionalconformal==0.0.5" in readme_text |
|
|
|
|
| def test_main_writes_requested_paper_artifacts(tmp_path: Path) -> None: |
| _write_physionet_dropout_run(tmp_path / "none", "", 0.0) |
| _write_physionet_dropout_run(tmp_path / "rd03", "__random_drop", 0.02) |
| _write_physionet_dropout_run(tmp_path / "rd05", "__random_drop", 0.04) |
| summary_path = _write_dropout_summary(tmp_path / "dropout_grid_summary.csv") |
| model_summary_path = _write_model_dropout_summary(tmp_path / "model_dropout_grid_summary.csv") |
| overall_path, subgroup_path = _write_gossis_summary_files(tmp_path / "gossis") |
| output_dir = tmp_path / "paper" |
|
|
| main( |
| [ |
| "--physionet-none-dir", |
| str(tmp_path / "none"), |
| "--physionet-rd03-dir", |
| str(tmp_path / "rd03"), |
| "--physionet-rd05-dir", |
| str(tmp_path / "rd05"), |
| "--physionet-dropout-summary-path", |
| str(summary_path), |
| "--physionet-model-dropout-summary-path", |
| str(model_summary_path), |
| "--gossis-overall-path", |
| str(overall_path), |
| "--gossis-subgroup-path", |
| str(subgroup_path), |
| "--output-dir", |
| str(output_dir), |
| ] |
| ) |
|
|
| expected_files = { |
| "figure1_method_schematic.png", |
| "figure1_method_schematic.pdf", |
| "figure2_physionet_stress_test.png", |
| "figure2_physionet_stress_test.pdf", |
| "figureA2_physionet_model_auroc_dropout.png", |
| "figureA2_physionet_model_auroc_dropout.pdf", |
| "figure3_gossis_subgroup_coverage.png", |
| "figure3_gossis_subgroup_coverage.pdf", |
| "table1_physionet_main_results.csv", |
| "table1_physionet_main_results.png", |
| "table1_physionet_main_results.tex", |
| "tableA1_physionet_full_results.csv", |
| "tableA1_physionet_full_results.png", |
| "tableA1_physionet_full_results.tex", |
| "table2_gossis_validation_summary.csv", |
| "table2_gossis_validation_summary.png", |
| "table2_gossis_validation_summary.tex", |
| "artifact_manifest.json", |
| } |
| assert expected_files <= {path.name for path in output_dir.iterdir()} |
|
|