| from __future__ import annotations |
|
|
| import json |
| from pathlib import Path |
|
|
| import pandas as pd |
|
|
|
|
| def test_run_appendix_phase2_analysis_writes_expected_outputs(tmp_path: Path) -> None: |
| from sepsis_mcp.appendix_phase2_analysis import AppendixPhase2Config, run_appendix_phase2_analysis |
|
|
| alpha005 = tmp_path / "alpha005" |
| alpha010 = tmp_path / "alpha010" |
| grouping = tmp_path / "grouping" |
| selection = tmp_path / "selection" |
| output_dir = tmp_path / "phase2" |
| for path in (alpha005, alpha010, grouping, selection): |
| path.mkdir(parents=True, exist_ok=True) |
|
|
| repeated_alpha = pd.DataFrame( |
| [ |
| { |
| "experiment": "hospital_disjoint", |
| "method": "standard", |
| "min_hospital_admissions": 500, |
| "alpha": 0.05, |
| "selection_fraction": 0.1, |
| "model_type": "xgboost", |
| "missingness_grouping_strategy": "coverage_gap_variable", |
| "mask_strategy": "none", |
| "mask_rate": 0.0, |
| "selective_feature_group": "labs", |
| "weighted_shrinkage_lambda": 0.5, |
| "run_count": 2, |
| "empirical_coverage_mean": 0.95, |
| "empirical_coverage_std": 0.01, |
| "max_group_coverage_gap_mean": 0.04, |
| "max_group_coverage_gap_std": 0.01, |
| "average_set_size_mean": 1.1, |
| "average_set_size_std": 0.1, |
| "worst_hospital_coverage_mean": 0.88, |
| "worst_hospital_coverage_std": 0.02, |
| "wcr": 0.82, |
| }, |
| { |
| "experiment": "hospital_disjoint", |
| "method": "missingness_aware", |
| "min_hospital_admissions": 500, |
| "alpha": 0.05, |
| "selection_fraction": 0.1, |
| "model_type": "xgboost", |
| "missingness_grouping_strategy": "coverage_gap_variable", |
| "mask_strategy": "none", |
| "mask_rate": 0.0, |
| "selective_feature_group": "labs", |
| "weighted_shrinkage_lambda": 0.5, |
| "run_count": 2, |
| "empirical_coverage_mean": 0.95, |
| "empirical_coverage_std": 0.01, |
| "max_group_coverage_gap_mean": 0.02, |
| "max_group_coverage_gap_std": 0.01, |
| "average_set_size_mean": 1.12, |
| "average_set_size_std": 0.1, |
| "worst_hospital_coverage_mean": 0.90, |
| "worst_hospital_coverage_std": 0.02, |
| "wcr": 0.90, |
| }, |
| ] |
| ) |
| repeated_alpha.to_csv(alpha005 / "repeated_summary.csv", index=False) |
| repeated_alpha.assign(alpha=0.10).to_csv(alpha010 / "repeated_summary.csv", index=False) |
|
|
| grouping_repeated = pd.DataFrame( |
| [ |
| { |
| "experiment": "hospital_disjoint", |
| "method": "standard", |
| "min_hospital_admissions": 500, |
| "alpha": 0.1, |
| "selection_fraction": 0.1, |
| "model_type": "xgboost", |
| "missingness_grouping_strategy": "coverage_gap_variable", |
| "mask_strategy": "none", |
| "mask_rate": 0.0, |
| "selective_feature_group": "labs", |
| "weighted_shrinkage_lambda": 0.5, |
| "run_count": 2, |
| "empirical_coverage_mean": 0.90, |
| "max_group_coverage_gap_mean": 0.05, |
| "average_set_size_mean": 0.95, |
| "worst_hospital_coverage_mean": 0.84, |
| "wcr": 0.80, |
| }, |
| { |
| "experiment": "hospital_disjoint", |
| "method": "missingness_aware", |
| "min_hospital_admissions": 500, |
| "alpha": 0.1, |
| "selection_fraction": 0.1, |
| "model_type": "xgboost", |
| "missingness_grouping_strategy": "coverage_gap_variable", |
| "mask_strategy": "none", |
| "mask_rate": 0.0, |
| "selective_feature_group": "labs", |
| "weighted_shrinkage_lambda": 0.5, |
| "run_count": 2, |
| "empirical_coverage_mean": 0.90, |
| "max_group_coverage_gap_mean": 0.02, |
| "average_set_size_mean": 0.97, |
| "worst_hospital_coverage_mean": 0.88, |
| "wcr": 0.88, |
| }, |
| ] |
| ) |
| grouping_repeated.to_csv(grouping / "repeated_summary.csv", index=False) |
|
|
| selected_variable_stability = pd.DataFrame( |
| [ |
| { |
| "experiment": "hospital_disjoint", |
| "method": "missingness_aware", |
| "min_hospital_admissions": 500, |
| "alpha": 0.1, |
| "selection_fraction": 0.1, |
| "model_type": "xgboost", |
| "missingness_grouping_strategy": "coverage_gap_variable", |
| "mask_strategy": "none", |
| "mask_rate": 0.0, |
| "selective_feature_group": "labs", |
| "weighted_shrinkage_lambda": 0.5, |
| "selected_variable": "lactate", |
| "count": 2, |
| "frequency": 0.5, |
| }, |
| { |
| "experiment": "hospital_disjoint", |
| "method": "missingness_aware", |
| "min_hospital_admissions": 500, |
| "alpha": 0.1, |
| "selection_fraction": 0.1, |
| "model_type": "xgboost", |
| "missingness_grouping_strategy": "coverage_gap_variable", |
| "mask_strategy": "none", |
| "mask_rate": 0.0, |
| "selective_feature_group": "labs", |
| "weighted_shrinkage_lambda": 0.5, |
| "selected_variable": "pao2fio2ratio", |
| "count": 1, |
| "frequency": 0.25, |
| }, |
| ] |
| ) |
| selected_variable_stability.to_csv(selection / "selected_variable_stability.csv", index=False) |
| pd.DataFrame( |
| [ |
| { |
| "experiment": "hospital_disjoint", |
| "method": "missingness_aware", |
| "min_hospital_admissions": 500, |
| "alpha": 0.1, |
| "selection_fraction": 0.1, |
| "model_type": "xgboost", |
| "missingness_grouping_strategy": "coverage_gap_variable", |
| "mask_strategy": "none", |
| "mask_rate": 0.0, |
| "selective_feature_group": "labs", |
| "weighted_shrinkage_lambda": 0.5, |
| "run_count": 4, |
| "mean_pairwise_jaccard": 0.3, |
| "min_pairwise_jaccard": 0.0, |
| } |
| ] |
| ).to_csv(selection / "selected_variable_jaccard_summary.csv", index=False) |
| grouping_repeated[grouping_repeated["method"] == "missingness_aware"].to_csv( |
| selection / "repeated_summary.csv", |
| index=False, |
| ) |
| pd.DataFrame( |
| [ |
| { |
| "run_id": "seed0", |
| "method": "missingness_aware", |
| "selected_variable": "lactate", |
| "model_type": "xgboost", |
| "empirical_coverage": 0.90, |
| "max_group_coverage_gap": 0.02, |
| "average_set_size": 0.97, |
| "worst_hospital_coverage": 0.88, |
| }, |
| { |
| "run_id": "seed1", |
| "method": "missingness_aware", |
| "selected_variable": "pao2fio2ratio", |
| "model_type": "xgboost", |
| "empirical_coverage": 0.89, |
| "max_group_coverage_gap": 0.03, |
| "average_set_size": 0.96, |
| "worst_hospital_coverage": 0.87, |
| }, |
| ] |
| ).to_csv(selection / "overall_summary.csv", index=False) |
|
|
| paths = run_appendix_phase2_analysis( |
| AppendixPhase2Config( |
| alpha_sweep_dirs=(alpha005, alpha010), |
| grouping_sweep_dir=grouping, |
| selection_sweep_dir=selection, |
| output_dir=output_dir, |
| ) |
| ) |
|
|
| alpha_summary = pd.read_csv(paths["alpha_sweep_summary"]) |
| grouping_summary = pd.read_csv(paths["grouping_baselines_summary"]) |
| stability_summary = pd.read_csv(paths["selection_stability_summary"]) |
| performance_by_variable = pd.read_csv(paths["selection_performance_by_variable"]) |
| manifest = json.loads(paths["manifest"].read_text(encoding="utf-8")) |
|
|
| assert set(alpha_summary["alpha"]) == {0.05, 0.10} |
| assert "source_dir" in alpha_summary.columns |
| assert "gap_reduction_vs_standard_mean" in grouping_summary.columns |
| assert stability_summary["top_selected_variable"].iloc[0] == "lactate" |
| assert stability_summary["top_selected_variable_frequency"].iloc[0] == 0.5 |
| assert set(performance_by_variable["selected_variable"]) == {"lactate", "pao2fio2ratio"} |
| assert Path(manifest["alpha_sweep_summary"]).exists() |
|
|