from __future__ import annotations import json from pathlib import Path import pandas as pd from sepsis_mcp.paper_artifacts import ( build_physionet_appendix_table, build_physionet_gap_plot_frame, build_physionet_model_auroc_frame, build_gossis_validation_subgroup_frame, build_gossis_validation_table, build_physionet_main_table, main, ) def _write_physionet_dropout_run(base_dir: Path, experiment_suffix: str, coverage_shift: float) -> None: base_dir.mkdir(parents=True, exist_ok=True) metrics = { "config": {"alpha": 0.1}, "experiments": { f"A_to_A{experiment_suffix}": { "standard": { "empirical_coverage": 0.91 - coverage_shift, "average_set_size": 0.95 - coverage_shift, "target_coverage": 0.9, }, "missingness_aware": { "empirical_coverage": 0.89 + coverage_shift, "average_set_size": 0.96, "target_coverage": 0.9, }, "weighted_missingness_aware": { "empirical_coverage": 0.9, "average_set_size": 0.94, "target_coverage": 0.9, }, "shrunk_weighted_missingness_aware": { "empirical_coverage": 0.905, "average_set_size": 0.93, "target_coverage": 0.9, }, }, f"A_to_B{experiment_suffix}": { "standard": { "empirical_coverage": 0.9 - coverage_shift, "average_set_size": 0.91 - coverage_shift, "target_coverage": 0.9, }, "missingness_aware": { "empirical_coverage": 0.88 + coverage_shift, "average_set_size": 0.915 + coverage_shift, "target_coverage": 0.9, }, "weighted_missingness_aware": { "empirical_coverage": 0.885 + coverage_shift / 2, "average_set_size": 0.9, "target_coverage": 0.9, }, "shrunk_weighted_missingness_aware": { "empirical_coverage": 0.89 + coverage_shift / 3, "average_set_size": 0.905, "target_coverage": 0.9, }, }, }, } (base_dir / "metrics.json").write_text(json.dumps(metrics), encoding="utf-8") def _write_dropout_summary(path: Path) -> Path: pd.DataFrame( [ {"condition": "none", "mask_rate": 0.0, "method": "standard", "empirical_coverage": 0.8976, "target_coverage": 0.9, "absolute_coverage_gap": 0.0024, "average_set_size": 0.9126}, {"condition": "none", "mask_rate": 0.0, "method": "missingness_aware", "empirical_coverage": 0.8772, "target_coverage": 0.9, "absolute_coverage_gap": 0.0228, "average_set_size": 0.8914}, {"condition": "drop=0.3", "mask_rate": 0.3, "method": "standard", "empirical_coverage": 0.8847, "target_coverage": 0.9, "absolute_coverage_gap": 0.0153, "average_set_size": 0.8971}, {"condition": "drop=0.3", "mask_rate": 0.3, "method": "missingness_aware", "empirical_coverage": 0.8962, "target_coverage": 0.9, "absolute_coverage_gap": 0.0038, "average_set_size": 0.9104}, {"condition": "drop=0.5", "mask_rate": 0.5, "method": "standard", "empirical_coverage": 0.8661, "target_coverage": 0.9, "absolute_coverage_gap": 0.0339, "average_set_size": 0.8794}, {"condition": "drop=0.5", "mask_rate": 0.5, "method": "missingness_aware", "empirical_coverage": 0.8993, "target_coverage": 0.9, "absolute_coverage_gap": 0.0007, "average_set_size": 0.9149}, ] ).to_csv(path, index=False) return path def _write_model_dropout_summary(path: Path) -> Path: pd.DataFrame( [ {"condition": "none", "mask_rate": 0.0, "model_type": "xgboost", "auroc": 0.81, "auprc": 0.10}, {"condition": "drop=0.3", "mask_rate": 0.3, "model_type": "xgboost", "auroc": 0.78, "auprc": 0.09}, {"condition": "none", "mask_rate": 0.0, "model_type": "sklearn_gbdt", "auroc": 0.79, "auprc": 0.08}, {"condition": "drop=0.3", "mask_rate": 0.3, "model_type": "sklearn_gbdt", "auroc": 0.75, "auprc": 0.07}, {"condition": "none", "mask_rate": 0.0, "model_type": "logistic_regression", "auroc": 0.72, "auprc": 0.05}, {"condition": "drop=0.3", "mask_rate": 0.3, "model_type": "logistic_regression", "auroc": 0.69, "auprc": 0.04}, ] ).to_csv(path, index=False) return path def _write_gossis_summary_files(base_dir: Path) -> tuple[Path, Path]: base_dir.mkdir(parents=True, exist_ok=True) overall_path = base_dir / "overall_summary.csv" subgroup_path = base_dir / "subgroup_summary.csv" pd.DataFrame( [ { "method": "standard", "min_hospital_admissions": 500, "empirical_coverage": 0.895, "average_set_size": 0.95, "max_group_coverage_gap": 0.08, "worst_hospital_coverage": 0.84, }, { "method": "standard", "min_hospital_admissions": 500, "empirical_coverage": 0.885, "average_set_size": 0.94, "max_group_coverage_gap": 0.10, "worst_hospital_coverage": 0.83, }, { "method": "missingness_aware", "min_hospital_admissions": 500, "empirical_coverage": 0.898, "average_set_size": 0.96, "max_group_coverage_gap": 0.02, "worst_hospital_coverage": 0.835, }, { "method": "weighted_missingness_aware", "min_hospital_admissions": 500, "empirical_coverage": 0.9, "average_set_size": 0.955, "max_group_coverage_gap": 0.05, "worst_hospital_coverage": 0.845, }, { "method": "shrunk_weighted_missingness_aware", "min_hospital_admissions": 500, "empirical_coverage": 0.897, "average_set_size": 0.952, "max_group_coverage_gap": 0.07, "worst_hospital_coverage": 0.844, }, ] ).to_csv(overall_path, index=False) pd.DataFrame( [ {"method": "standard", "min_hospital_admissions": 500, "group_label": "low", "coverage": 0.82}, {"method": "standard", "min_hospital_admissions": 500, "group_label": "medium", "coverage": 0.92}, {"method": "standard", "min_hospital_admissions": 500, "group_label": "high", "coverage": 0.94}, {"method": "missingness_aware", "min_hospital_admissions": 500, "group_label": "low", "coverage": 0.89}, {"method": "missingness_aware", "min_hospital_admissions": 500, "group_label": "medium", "coverage": 0.90}, {"method": "missingness_aware", "min_hospital_admissions": 500, "group_label": "high", "coverage": 0.91}, {"method": "weighted_missingness_aware", "min_hospital_admissions": 500, "group_label": "low", "coverage": 0.86}, {"method": "weighted_missingness_aware", "min_hospital_admissions": 500, "group_label": "medium", "coverage": 0.91}, {"method": "weighted_missingness_aware", "min_hospital_admissions": 500, "group_label": "high", "coverage": 0.93}, ] ).to_csv(subgroup_path, index=False) return overall_path, subgroup_path def _write_gossis_summary_files_with_opt_in_methods(base_dir: Path) -> tuple[Path, Path]: overall_path, subgroup_path = _write_gossis_summary_files(base_dir) overall = pd.read_csv(overall_path) subgroup = pd.read_csv(subgroup_path) pd.concat( [ overall, pd.DataFrame( [ { "method": "learned_partition", "min_hospital_admissions": 500, "empirical_coverage": 0.901, "average_set_size": 0.958, "max_group_coverage_gap": 0.03, "worst_hospital_coverage": 0.84, }, { "method": "gibbs_general", "min_hospital_admissions": 500, "empirical_coverage": 0.902, "average_set_size": 0.959, "max_group_coverage_gap": 0.04, "worst_hospital_coverage": 0.845, }, ] ), ], ignore_index=True, ).to_csv(overall_path, index=False) pd.concat( [ subgroup, pd.DataFrame( [ {"method": "learned_partition", "min_hospital_admissions": 500, "group_label": "low", "coverage": 0.88}, {"method": "gibbs_general", "min_hospital_admissions": 500, "group_label": "low", "coverage": 0.87}, ] ), ], ignore_index=True, ).to_csv(subgroup_path, index=False) return overall_path, subgroup_path def test_build_physionet_tables_and_plot_frame(tmp_path: Path) -> None: _write_physionet_dropout_run(tmp_path / "none", "", 0.0) _write_physionet_dropout_run(tmp_path / "rd03", "__random_drop", 0.02) _write_physionet_dropout_run(tmp_path / "rd05", "__random_drop", 0.04) summary_path = _write_dropout_summary(tmp_path / "dropout_grid_summary.csv") model_summary_path = _write_model_dropout_summary(tmp_path / "model_dropout_grid_summary.csv") table = build_physionet_main_table(tmp_path / "none", tmp_path / "rd03", tmp_path / "rd05") appendix = build_physionet_appendix_table(tmp_path / "none", tmp_path / "rd03", tmp_path / "rd05") stress = build_physionet_gap_plot_frame(summary_path) model_auroc = build_physionet_model_auroc_frame(model_summary_path) assert len(table) == 18 assert list(table.columns) == [ "setting", "experiment", "condition", "method", "empirical_coverage", "average_set_size", "absolute_coverage_gap", ] assert set(table["method"]) == {"standard", "missingness_aware", "weighted_missingness_aware"} assert len(appendix) == 24 assert "shrunk_weighted_missingness_aware" in set(appendix["method"]) assert set(stress["method"]) == {"standard", "missingness_aware"} assert set(stress["condition"]) == {"none", "drop=0.3", "drop=0.5"} assert list(stress.columns) == ["condition", "mask_rate", "method", "empirical_coverage", "target_coverage", "absolute_coverage_gap", "average_set_size"] assert (stress["absolute_coverage_gap"] >= 0).all() assert set(model_auroc["model_type"]) == {"xgboost", "sklearn_gbdt", "logistic_regression"} assert list(model_auroc.columns) == ["condition", "mask_rate", "model_type", "auroc", "auprc"] def test_build_gossis_validation_frames(tmp_path: Path) -> None: overall_path, subgroup_path = _write_gossis_summary_files(tmp_path) table = build_gossis_validation_table(overall_path, min_hospital_admissions=500) subgroup = build_gossis_validation_subgroup_frame(subgroup_path, min_hospital_admissions=500) assert list(table["method"]) == [ "standard", "missingness_aware", "weighted_missingness_aware", "shrunk_weighted_missingness_aware", ] assert table.loc[table["method"] == "standard", "empirical_coverage"].item() == 0.89 assert set(subgroup["method"]) == {"standard", "missingness_aware", "weighted_missingness_aware"} assert set(subgroup["group_label"]) == {"low", "medium", "high"} def test_build_gossis_validation_frames_ignore_opt_in_methods(tmp_path: Path) -> None: overall_path, subgroup_path = _write_gossis_summary_files_with_opt_in_methods(tmp_path) table = build_gossis_validation_table(overall_path, min_hospital_admissions=500) subgroup = build_gossis_validation_subgroup_frame(subgroup_path, min_hospital_admissions=500) assert "learned_partition" not in set(table["method"]) assert "gibbs_general" not in set(table["method"]) assert "learned_partition" not in set(subgroup["method"]) assert "gibbs_general" not in set(subgroup["method"]) def test_external_baseline_dependency_is_documented_as_optional() -> None: pyproject_text = Path("pyproject.toml").read_text(encoding="utf-8") readme_text = Path("README.md").read_text(encoding="utf-8") assert "[project.optional-dependencies]" in pyproject_text assert "conditionalconformal==0.0.5" in pyproject_text assert "external-baselines" in pyproject_text assert "conditionalconformal==0.0.5" in readme_text def test_main_writes_requested_paper_artifacts(tmp_path: Path) -> None: _write_physionet_dropout_run(tmp_path / "none", "", 0.0) _write_physionet_dropout_run(tmp_path / "rd03", "__random_drop", 0.02) _write_physionet_dropout_run(tmp_path / "rd05", "__random_drop", 0.04) summary_path = _write_dropout_summary(tmp_path / "dropout_grid_summary.csv") model_summary_path = _write_model_dropout_summary(tmp_path / "model_dropout_grid_summary.csv") overall_path, subgroup_path = _write_gossis_summary_files(tmp_path / "gossis") output_dir = tmp_path / "paper" main( [ "--physionet-none-dir", str(tmp_path / "none"), "--physionet-rd03-dir", str(tmp_path / "rd03"), "--physionet-rd05-dir", str(tmp_path / "rd05"), "--physionet-dropout-summary-path", str(summary_path), "--physionet-model-dropout-summary-path", str(model_summary_path), "--gossis-overall-path", str(overall_path), "--gossis-subgroup-path", str(subgroup_path), "--output-dir", str(output_dir), ] ) expected_files = { "figure1_method_schematic.png", "figure1_method_schematic.pdf", "figure2_physionet_stress_test.png", "figure2_physionet_stress_test.pdf", "figureA2_physionet_model_auroc_dropout.png", "figureA2_physionet_model_auroc_dropout.pdf", "figure3_gossis_subgroup_coverage.png", "figure3_gossis_subgroup_coverage.pdf", "table1_physionet_main_results.csv", "table1_physionet_main_results.png", "table1_physionet_main_results.tex", "tableA1_physionet_full_results.csv", "tableA1_physionet_full_results.png", "tableA1_physionet_full_results.tex", "table2_gossis_validation_summary.csv", "table2_gossis_validation_summary.png", "table2_gossis_validation_summary.tex", "artifact_manifest.json", } assert expected_files <= {path.name for path in output_dir.iterdir()}