Spaces:

fudan-renjun
/

Machine-learning-2-8

Sleeping

App Files Files Community

fudan-renjun commited on Apr 6

Commit

ee7a685

verified ·

1 Parent(s): a21dd2d

Update app.py

Browse files

Files changed (1) hide show

app.py +365 -154

app.py CHANGED Viewed

@@ -235,17 +235,194 @@ def bootstrap_auc_test(y_true, proba_a, proba_b, classes, n_bootstrap=2000, seed
         return 1.0, auc_a, auc_b, -1, 1  # Not enough valid bootstraps
     diffs = np.array(diffs)
-    # Two-sided p-value: proportion of bootstrap diffs that cross zero
-    # Under H0: diff=0, we center the diffs
     centered = diffs - np.mean(diffs)
     p_value = np.mean(np.abs(centered) >= np.abs(observed_diff))
-    p_value = max(p_value, 1.0 / n_bootstrap)  # Floor
     ci_low = np.percentile(diffs, 2.5)
     ci_high = np.percentile(diffs, 97.5)
     return p_value, auc_a, auc_b, ci_low, ci_high
 # ============================================================================
 # Model configs (multi-class compatible)
 # ============================================================================
@@ -326,10 +503,8 @@ def run_pipeline(
             log(f"  📋 CSV: Col1=Label, Col2+=Features (no ID column)")
         fnames = X.columns.tolist()
-        # Parse user selection: "3 类" -> 3, "2 类（二分类）" -> 2
         user_n = int(str(n_classes_select).split(" ")[0])
-        # Validate against actual data
         detected_classes = sorted(y.unique())
         detected_classes = [int(c) if hasattr(c, 'item') else c for c in detected_classes]
         detected_n = len(detected_classes)
@@ -342,7 +517,6 @@ def run_pipeline(
         n_classes = user_n
         log(f"  ✅ {n_classes} 分类 — 数据验证通过")
-        # Remap to 0,1,...,n-1
         label_map = {c: i for i, c in enumerate(classes)}
         label_map_inv = {i: c for c, i in label_map.items()}
         y_mapped = y.map(label_map)
@@ -439,7 +613,7 @@ def run_pipeline(
             plot_multiclass_roc(r['all_yt'], r['all_yproba'], class_indices,
                 f'ROC — {mn} ({task_type}, Macro AUC={r["mean_auc"]:.3f})', f'roc_{mn}', rf)
-        # Combined ROC (macro per model)  【原有代码，原封不动】
         plt.figure(figsize=(10, 8))
         for i, mn in enumerate(mnames):
             r = amr[mn]
@@ -515,16 +689,13 @@ def run_pipeline(
         # ====================================================================
         # ★★★ 新增 Part-1：训练集全模型 ROC / PR 曲线
-        #     新文件名前缀 train_roc_* / train_pr_*，与原有文件名零冲突
         # ====================================================================
         progress(0.57, desc="📈 [新增] 训练集ROC/PR曲线...")
         log(f"\n  📈 [新增] 各模型训练集（in-sample）ROC / PR 曲线...")
-        # 两个内部辅助函数，仅用于叠加绘图数据准备
         def _macro_roc_arrays(yt, yp, nc, cls_idx):
             y_b = label_binarize(yt, classes=cls_idx)
-            if nc == 2:
-                y_b = np.hstack([1 - y_b, y_b])
             all_fpr = np.linspace(0, 1, 300)
             mean_tpr = np.zeros_like(all_fpr)
             for c in range(nc):
@@ -535,8 +706,7 @@ def run_pipeline(
         def _macro_pr_arrays(yt, yp, nc, cls_idx):
             y_b = label_binarize(yt, classes=cls_idx)
-            if nc == 2:
-                y_b = np.hstack([1 - y_b, y_b])
             all_rec = np.linspace(0, 1, 300)
             mean_prec = np.zeros_like(all_rec)
             for c in range(nc):
@@ -545,165 +715,129 @@ def run_pipeline(
             mean_prec /= nc
             return all_rec, mean_prec
-        _tr_roc = {}   # mn -> (fpr, tpr, auc) 供汇总图使用
-        _tr_pr  = {}   # mn -> (rec, prec)      供汇总图使用
         for mn in mnames:
             yp_tr = tms[mn].predict_proba(X.values)
-            # 每个模型独立图：各类别曲线 + macro（复用已有绘图函数，仅前缀不同）
-            plot_multiclass_roc(
-                y_mapped.values, yp_tr, class_indices,
-                f'Train ROC — {mn} ({task_type})',
-                f'train_roc_{mn}', rf
-            )
-            plot_multiclass_pr(
-                y_mapped.values, yp_tr, class_indices,
-                f'Train PR — {mn} ({task_type})',
-                f'train_pr_{mn}', rf
-            )
             fpr_t, tpr_t, auc_t = _macro_roc_arrays(y_mapped.values, yp_tr, n_classes, class_indices)
             rec_t, prec_t = _macro_pr_arrays(y_mapped.values, yp_tr, n_classes, class_indices)
             _tr_roc[mn] = (fpr_t, tpr_t, auc_t)
             _tr_pr[mn]  = (rec_t, prec_t)
-        # 汇总：训练集全模型 ROC（train_roc_all）
         plt.figure(figsize=(10, 8))
         for i, mn in enumerate(mnames):
             fpr_t, tpr_t, auc_t = _tr_roc[mn]
-            plt.plot(fpr_t, tpr_t, color=COLORS[i % 8], lw=2.5,
-                     label=f'{mn} (Train Macro AUC={auc_t:.3f})')
-        plt.plot([0, 1], [0, 1], '--', color='#ccc', lw=1)
-        plt.xlim([-0.02, 1.02]); plt.ylim([-0.02, 1.02])
-        plt.xlabel('False Positive Rate', fontsize=13)
-        plt.ylabel('True Positive Rate', fontsize=13)
-        plt.title(f'Train ROC — All Models ({task_type})', fontsize=14, fontweight='bold')
-        plt.legend(loc='lower right', fontsize=10)
-        plt.grid(True, alpha=0.15); plt.tight_layout()
-        plt.savefig(os.path.join(rf, 'train_roc_all.pdf'), format='pdf', bbox_inches='tight', dpi=300)
-        plt.savefig(os.path.join(rf, 'train_roc_all.png'), format='png', bbox_inches='tight', dpi=150)
         plt.close()
-        # 汇总：训练集全模型 PR（train_pr_all）
         plt.figure(figsize=(10, 8))
         for i, mn in enumerate(mnames):
             rec_t, prec_t = _tr_pr[mn]
-            plt.plot(rec_t, prec_t, color=COLORS[i % 8], lw=2.5,
-                     label=f'{mn} (Mean AP={prec_t.mean():.3f})')
-        plt.xlim([-0.02, 1.02]); plt.ylim([-0.02, 1.02])
-        plt.xlabel('Recall', fontsize=13)
-        plt.ylabel('Precision', fontsize=13)
-        plt.title(f'Train PR — All Models ({task_type})', fontsize=14, fontweight='bold')
-        plt.legend(loc='lower left', fontsize=10)
-        plt.grid(True, alpha=0.15); plt.tight_layout()
-        plt.savefig(os.path.join(rf, 'train_pr_all.pdf'), format='pdf', bbox_inches='tight', dpi=300)
-        plt.savefig(os.path.join(rf, 'train_pr_all.png'), format='png', bbox_inches='tight', dpi=150)
         plt.close()
-        log(f"     ✅ 训练集 ROC/PR 已生成：各模型独立图 + 汇总图（train_roc_all / train_pr_all）")
         # ====================================================================
-        # ★★★ 新增 Part-2：最终模型（best_mn）训练集 vs 内部 CV 对比
-        #     新文件：roc_train_vs_cv_* / pr_train_vs_cv_* / cm_train_*
-        #             train_vs_cv_*.xlsx
-        #     原有文件：roc_* / pr_* / cm_* / model_evaluation.xlsx 均不变
         # ====================================================================
         progress(0.59, desc="📊 [新增] 最终模型Train vs CV对比...")
         log(f"\n  📊 [新增] 最终模型 [{best_mn}] 训练集 vs 内部验证集（CV holdout）...")
-        # 训练集预测（用全量 fit 后的模型 tms[best_mn]）
         yp_best_tr = tms[best_mn].predict_proba(X.values)
         yd_best_tr = tms[best_mn].predict(X.values)
-        met_tr = compute_multiclass_metrics(
-            y_mapped.values, yd_best_tr, yp_best_tr, class_indices)
-        # 内部 CV holdout（直接取 amr 中已累积的结果，不重新运算）
         yp_best_cv = amr[best_mn]['all_yproba']
         yd_best_cv = amr[best_mn]['all_yp']
         yt_best_cv = amr[best_mn]['all_yt']
-        met_cv = compute_multiclass_metrics(
-            yt_best_cv, yd_best_cv, yp_best_cv, class_indices)
         log(f"     Train  → AUC={met_tr['Macro_AUC']:.4f}  Acc={met_tr['Accuracy']:.4f}"
-            f"  F1={met_tr['Macro_F1']:.4f}  Kappa={met_tr['Kappa']:.4f}")
         log(f"     CV-Val → AUC={met_cv['Macro_AUC']:.4f}  Acc={met_cv['Accuracy']:.4f}"
-            f"  F1={met_cv['Macro_F1']:.4f}  Kappa={met_cv['Kappa']:.4f}")
-        # 对比 ROC（roc_train_vs_cv_{best_mn}）
-        fpr_tb, tpr_tb, auc_tb = _macro_roc_arrays(
-            y_mapped.values, yp_best_tr, n_classes, class_indices)
-        fpr_cb, tpr_cb, auc_cb = _macro_roc_arrays(
-            yt_best_cv, yp_best_cv, n_classes, class_indices)
         fig, ax = plt.subplots(figsize=(10, 8))
-        ax.plot(fpr_tb, tpr_tb, color='#e41a1c', lw=2.5,
-                label=f'Train set (Macro AUC={auc_tb:.3f})')
-        ax.plot(fpr_cb, tpr_cb, color='#377eb8', lw=2.5, linestyle='--',
-                label=f'Internal CV (Macro AUC={auc_cb:.3f})')
-        ax.plot([0, 1], [0, 1], '--', color='#ccc', lw=1)
-        ax.set_xlim([-0.02, 1.02]); ax.set_ylim([-0.02, 1.02])
-        ax.set_xlabel('False Positive Rate', fontsize=13)
-        ax.set_ylabel('True Positive Rate', fontsize=13)
-        ax.set_title(f'ROC — {best_mn}: Train vs Internal CV ({task_type})',
-                     fontsize=14, fontweight='bold')
-        ax.legend(loc='lower right', fontsize=11)
-        ax.grid(True, alpha=0.15); plt.tight_layout()
-        plt.savefig(os.path.join(rf, f'roc_train_vs_cv_{best_mn}.pdf'),
-                    format='pdf', bbox_inches='tight', dpi=300)
-        plt.savefig(os.path.join(rf, f'roc_train_vs_cv_{best_mn}.png'),
-                    format='png', bbox_inches='tight', dpi=150)
         plt.close()
-        # 对比 PR（pr_train_vs_cv_{best_mn}）
-        rec_tb, prec_tb = _macro_pr_arrays(
-            y_mapped.values, yp_best_tr, n_classes, class_indices)
-        rec_cb, prec_cb = _macro_pr_arrays(
-            yt_best_cv, yp_best_cv, n_classes, class_indices)
         fig, ax = plt.subplots(figsize=(10, 8))
-        ax.plot(rec_tb, prec_tb, color='#e41a1c', lw=2.5,
-                label=f'Train set (Mean AP={prec_tb.mean():.3f})')
-        ax.plot(rec_cb, prec_cb, color='#377eb8', lw=2.5, linestyle='--',
-                label=f'Internal CV (Mean AP={prec_cb.mean():.3f})')
-        ax.set_xlim([-0.02, 1.02]); ax.set_ylim([-0.02, 1.02])
-        ax.set_xlabel('Recall', fontsize=13)
-        ax.set_ylabel('Precision', fontsize=13)
-        ax.set_title(f'PR — {best_mn}: Train vs Internal CV ({task_type})',
-                     fontsize=14, fontweight='bold')
-        ax.legend(loc='lower left', fontsize=11)
-        ax.grid(True, alpha=0.15); plt.tight_layout()
-        plt.savefig(os.path.join(rf, f'pr_train_vs_cv_{best_mn}.pdf'),
-                    format='pdf', bbox_inches='tight', dpi=300)
-        plt.savefig(os.path.join(rf, f'pr_train_vs_cv_{best_mn}.png'),
-                    format='png', bbox_inches='tight', dpi=150)
         plt.close()
-        # 训练集混淆矩阵（cm_train_{best_mn}）
-        plot_confusion_matrix(
-            y_mapped.values, yd_best_tr, class_indices,
-            f'Train CM — {best_mn} (Acc={met_tr["Accuracy"]:.3f})',
-            f'cm_train_{best_mn}', rf
-        )
-        # 指标汇总 Excel（train_vs_cv_{best_mn}.xlsx，独立新文件）
-        with pd.ExcelWriter(
-                os.path.join(rf, f'train_vs_cv_{best_mn}.xlsx'),
-                engine='openpyxl') as w:
             pd.DataFrame([
-                {'Split': 'Train',       'Model': best_mn,
-                 'Macro_AUC':  met_tr['Macro_AUC'],  'Accuracy':    met_tr['Accuracy'],
-                 'Macro_F1':   met_tr['Macro_F1'],   'Weighted_F1': met_tr['Weighted_F1'],
-                 'Kappa':      met_tr['Kappa']},
-                {'Split': 'Internal_CV', 'Model': best_mn,
-                 'Macro_AUC':  met_cv['Macro_AUC'],  'Accuracy':    met_cv['Accuracy'],
-                 'Macro_F1':   met_cv['Macro_F1'],   'Weighted_F1': met_cv['Weighted_F1'],
-                 'Kappa':      met_cv['Kappa']},
-            ]).to_excel(w, sheet_name='Summary', index=False)
-            pd.DataFrame(met_tr['report']).T.to_excel(w, sheet_name='Train_PerClass', index=True)
-            pd.DataFrame(met_cv['report']).T.to_excel(w, sheet_name='CV_PerClass',    index=True)
             amr[best_mn]['fold_df'].to_excel(w, sheet_name='CV_FoldDetail', index=False)
-        log(f"     ✅ Train vs CV 对比图及汇总数据已保存 → train_vs_cv_{best_mn}.xlsx")
         # ====================================================================
         # ★★★ 新增结束
         # ====================================================================
@@ -712,7 +846,6 @@ def run_pipeline(
         progress(0.62, desc="🔥 SHAP分析...")
         log(f"\n  🔥 SHAP特征分析 (保留模型中 Top 3)...")
         shap_imp = {}
-        # SHAP for top 3 retained models
         models_for_shap = sorted(retained, key=lambda x: amr[x]['mean_auc'], reverse=True)[:3]
         for si, mn in enumerate(models_for_shap):
@@ -729,12 +862,10 @@ def run_pipeline(
                     exp = shap.KernelExplainer(lambda x, m=mo: m.predict_proba(x), bg)
                     sv = exp.shap_values(Xs)
-                # Handle SHAP output: could be list of arrays (one per class) or 3D array
                 if isinstance(sv, list):
-                    # Average absolute SHAP across all classes
                     sv_abs = np.mean([np.abs(s) for s in sv], axis=0)
                 elif sv.ndim == 3:
-                    sv_abs = np.mean(np.abs(sv), axis=2)  # (samples, features)
                 else:
                     sv_abs = np.abs(sv)
@@ -745,7 +876,6 @@ def run_pipeline(
                 idf = pd.DataFrame({'Feature': fnames, 'Importance': fi}).sort_values('Importance', ascending=False)
                 shap_imp[mn] = idf
-                # Bar plot (works for any number of classes)
                 plt.figure(figsize=(10, max(6, TOPN * 0.3)))
                 top_df = idf.head(TOPN).iloc[::-1]
                 plt.barh(top_df['Feature'], top_df['Importance'], color='#2563eb', alpha=0.8)
@@ -759,7 +889,7 @@ def run_pipeline(
             except Exception as e:
                 log(f"     ⚠ {mn} SHAP失败: {e}")
-        # ── Feature Ablation (for best model only) ──  【原有代码，原封不动】
         progress(0.72, desc="🧪 特征消融...")
         log(f"\n  🧪 特征消融 (仅最佳模型 {best_mn})...")
         ablation_data = None
@@ -767,11 +897,14 @@ def run_pipeline(
             imp_df = shap_imp[best_mn]
             top_feats = imp_df.head(TOPN)['Feature'].tolist()
             fcs = []; aucs_a = []
-            scoring = 'roc_auc_ovr' if n_classes > 2 else 'roc_auc'
             for nf in range(1, len(top_feats) + 1):
                 Xsub = X[top_feats[:nf]]
                 fold_aucs = []
                 for tri, tei in skf.split(Xsub, y_mapped):
                     mf = deepcopy(mcfg[best_mn]['model'])
                     bp2 = bpd.get(best_mn, {})
@@ -786,31 +919,57 @@ def run_pipeline(
                             a = roc_auc_score(yte_f, yproba_f, multi_class='ovr', average='macro')
                     except: a = 0.0
                     fold_aucs.append(a)
                 fcs.append(nf); aucs_a.append(np.mean(fold_aucs))
-            # Find optimal: first N where AUC >= 95% of full AUC
             full_auc = amr[best_mn]['mean_auc']
             opt_n = len(top_feats)
             for i, a in enumerate(aucs_a):
                 if a >= full_auc * 0.95:
                     opt_n = i + 1; break
-            ablation_data = {'fcs': fcs, 'aucs': aucs_a, 'feats': top_feats, 'opt_n': opt_n, 'opt_feats': top_feats[:opt_n]}
             log(f"     ✅ 最优特征数: {opt_n} (AUC={aucs_a[opt_n-1]:.4f} vs Full={full_auc:.4f})")
-            # Plot
             plt.figure(figsize=(10, 7))
             plt.plot(fcs, aucs_a, 'o-', color='#2563eb', lw=2, ms=5)
-            plt.scatter([opt_n], [aucs_a[opt_n-1]], s=200, marker='*', color='#ef4444', edgecolors='black', lw=2, zorder=5)
-            plt.axhline(y=full_auc, color='gray', ls='--', lw=1, alpha=0.5, label=f'Full AUC={full_auc:.3f}')
             plt.xlabel('Number of Features', fontsize=13); plt.ylabel('Macro AUC', fontsize=13)
-            plt.title(f'Feature Ablation — {best_mn} (★ Optimal={opt_n})', fontsize=14, fontweight='bold')
             plt.legend(fontsize=11); plt.grid(True, alpha=0.15); plt.tight_layout()
             plt.savefig(os.path.join(rf, 'ablation.pdf'), format='pdf', bbox_inches='tight')
             plt.savefig(os.path.join(rf, 'ablation.png'), format='png', bbox_inches='tight', dpi=150)
             plt.close()
-        # ── External Validation ──  【原有代码，原封不动】
         val_files_list = [vf for vf in [val_file1, val_file2, val_file3] if vf is not None]
         final_feats = ablation_data['opt_feats'] if ablation_data else fnames
@@ -826,7 +985,6 @@ def run_pipeline(
                 vcol2_is_id = (vcol2.dtype == 'object') or (vcol2.nunique() / len(vcol2) > 0.5)
                 Xe = ed.iloc[:, 2:] if vcol2_is_id else ed.iloc[:, 1:]
-                # Map validation labels using same mapping
                 ye = ye_raw.map(label_map)
                 if ye.isna().any():
                     log(f"     ⚠ 验证集 {vi} 含有训练集中不存在的标签，已跳过")
@@ -842,8 +1000,16 @@ def run_pipeline(
                 yep = fm.predict_proba(Xes.values); yed = fm.predict(Xes.values)
                 ye_np = ye.values
                 metrics = compute_multiclass_metrics(ye_np, yed, yep, class_indices)
-                log(f"     ✅ AUC={metrics['Macro_AUC']:.4f}  Acc={metrics['Accuracy']:.4f}  F1={metrics['Macro_F1']:.4f}  Kappa={metrics['Kappa']:.4f}")
                 sfx = f'_ext{vi}' if len(val_files_list) > 1 else '_ext'
                 tag = f'Validation {vi}' if len(val_files_list) > 1 else 'External'
@@ -853,42 +1019,87 @@ def run_pipeline(
                 plot_confusion_matrix(ye_np, yed, class_indices, f'CM — {tag} ({best_mn})', f'cm{sfx}', rf)
                 with pd.ExcelWriter(os.path.join(rf, f'validation{sfx}.xlsx'), engine='openpyxl') as w:
                     pd.DataFrame([{'Model': best_mn, 'N_Features': len(final_feats),
                         'Macro_AUC': metrics['Macro_AUC'], 'Accuracy': metrics['Accuracy'],
                         'Macro_F1': metrics['Macro_F1'], 'Weighted_F1': metrics['Weighted_F1'],
                         'Kappa': metrics['Kappa']}]).to_excel(w, sheet_name='Metrics', index=False)
                     rpt = pd.DataFrame(metrics['report']).T
                     rpt.to_excel(w, sheet_name='Per_Class', index=True)
                     pd.DataFrame({'Feature': final_feats}).to_excel(w, sheet_name='Features', index=False)
-        # ── Save Results ──  【原有代码，原封不动】
         progress(0.92, desc="💾 保存结果...")
         log(f"\n  💾 保存结果...")
         with pd.ExcelWriter(os.path.join(rf, 'model_evaluation.xlsx'), engine='openpyxl') as w:
             for mn, r in amr.items():
                 r['fold_df'].to_excel(w, sheet_name=mn, index=False)
-            # Summary with retained status
             sd = [{'Model': mn, 'Macro_AUC': r['mean_auc'], 'Accuracy': r['mean_acc'],
                    'Macro_F1': r['mean_f1'], 'Retained': 'Yes' if mn in retained else 'No',
                    'Best': 'Best' if mn == best_mn else ''}
                   for mn, r in amr.items()]
-            pd.DataFrame(sd).sort_values('Macro_AUC', ascending=False).to_excel(w, sheet_name='Summary', index=False)
-            # Bootstrap test results
             if len(bootstrap_df) > 0:
                 bootstrap_df.to_excel(w, sheet_name='Bootstrap_Test', index=False)
-            # Per-class report for best model
             best_report = classification_report(amr[best_mn]['all_yt'], amr[best_mn]['all_yp'],
                                                 labels=class_indices, output_dict=True, zero_division=0)
             pd.DataFrame(best_report).T.to_excel(w, sheet_name=f'{best_mn}_PerClass', index=True)
         if ablation_data:
             with pd.ExcelWriter(os.path.join(rf, 'feature_ablation.xlsx'), engine='openpyxl') as w:
-                pd.DataFrame({'N': ablation_data['fcs'], 'AUC': ablation_data['aucs']}).to_excel(w, sheet_name='Ablation', index=False)
                 for mn, idf in shap_imp.items():
                     idf.to_excel(w, sheet_name=f'{mn}_Imp', index=False)
-        # Save params (English for SCI)  【原有代码，原封不动】
         with open(os.path.join(rf, 'best_params.txt'), 'w', encoding='utf-8') as f:
             f.write(f"Task: {task_type} Classification ({n_classes} classes)\n")
             f.write(f"Classes: {classes}\n")
@@ -914,7 +1125,7 @@ def run_pipeline(
             if ablation_data:
                 f.write(f"\nOptimal Features ({ablation_data['opt_n']}): {', '.join(ablation_data['opt_feats'])}\n")
-        # Save model  【原有代码，原封不动】
         pickle.dump({
             'model_name': best_mn, 'model': tms[best_mn], 'best_params': bpd[best_mn],
             'classes': classes, 'n_classes': n_classes, 'label_map': label_map,

         return 1.0, auc_a, auc_b, -1, 1  # Not enough valid bootstraps
     diffs = np.array(diffs)
     centered = diffs - np.mean(diffs)
     p_value = np.mean(np.abs(centered) >= np.abs(observed_diff))
+    p_value = max(p_value, 1.0 / n_bootstrap)
     ci_low = np.percentile(diffs, 2.5)
     ci_high = np.percentile(diffs, 97.5)
     return p_value, auc_a, auc_b, ci_low, ci_high
+# ============================================================================
+# ★ 新增全局工具函数：Bootstrap 95%CI + 敏感性/特异性等指标计算
+# ============================================================================
+def _bootstrap_ci(y_true, y_pred, y_proba, classes, metric_fn, n_bootstrap=1000, seed=42):
+    """
+    通用 Bootstrap 95% CI 计算器。
+    metric_fn(yt, yp, yproba) -> float
+    返回 (point_estimate, ci_low, ci_high)
+    """
+    rng = np.random.RandomState(seed)
+    n = len(y_true)
+    n_cls = len(classes)
+    point = metric_fn(y_true, y_pred, y_proba)
+    boots = []
+    for _ in range(n_bootstrap):
+        idx = rng.choice(n, n, replace=True)
+        yt_b = y_true[idx]
+        if len(np.unique(yt_b)) < n_cls:
+            continue
+        try:
+            boots.append(metric_fn(yt_b, y_pred[idx], y_proba[idx]))
+        except:
+            pass
+    if len(boots) < 50:
+        return point, np.nan, np.nan
+    return point, float(np.percentile(boots, 2.5)), float(np.percentile(boots, 97.5))
+def compute_extended_metrics_with_ci(y_true, y_pred, y_proba, classes,
+                                     n_bootstrap=1000, seed=42):
+    """
+    计算完整的多分类诊断指标，包含 95% Bootstrap CI。
+    指标（均为 macro-OvR 平均）：
+      Accuracy, Macro_AUC, Macro_F1, Weighted_F1, Kappa,
+      Sensitivity (Recall), Specificity, PPV (Precision), NPV, F1_macro
+    返回 dict，每个指标带 _CI_low / _CI_high。
+    同时返回 per_class_df（逐类详细指标）。
+    """
+    n_cls = len(classes)
+    y_true = np.array(y_true)
+    y_pred = np.array(y_pred)
+    y_proba = np.array(y_proba)
+    # ── 逐类指标（OvR） ──
+    per_rows = []
+    for i, cls in enumerate(classes):
+        yt_b = (y_true == i).astype(int)
+        yp_b = (y_pred == i).astype(int)
+        ypr_b = y_proba[:, i]
+        cm_b = confusion_matrix(yt_b, yp_b, labels=[0, 1])
+        tn, fp, fn, tp = cm_b.ravel() if cm_b.shape == (2,2) else (0,0,0,0)
+        sens  = tp / (tp + fn) if (tp + fn) > 0 else 0.0
+        spec  = tn / (tn + fp) if (tn + fp) > 0 else 0.0
+        ppv   = tp / (tp + fp) if (tp + fp) > 0 else 0.0
+        npv   = tn / (tn + fn) if (tn + fn) > 0 else 0.0
+        f1_c  = 2*ppv*sens / (ppv+sens) if (ppv+sens) > 0 else 0.0
+        try:
+            auc_c = roc_auc_score(yt_b, ypr_b)
+        except:
+            auc_c = np.nan
+        per_rows.append({
+            'Class': cls, 'TP': int(tp), 'FP': int(fp), 'FN': int(fn), 'TN': int(tn),
+            'Sensitivity': sens, 'Specificity': spec,
+            'PPV': ppv, 'NPV': npv, 'F1': f1_c, 'AUC': auc_c
+        })
+    per_class_df = pd.DataFrame(per_rows)
+    # ── Macro 平均点估计 ──
+    def _macro_sens(yt, yp, ypr):
+        vals = []
+        for i in range(n_cls):
+            yt_b = (yt == i).astype(int); yp_b = (yp == i).astype(int)
+            cm_b = confusion_matrix(yt_b, yp_b, labels=[0,1])
+            tn,fp,fn,tp = cm_b.ravel() if cm_b.shape==(2,2) else (0,0,0,0)
+            vals.append(tp/(tp+fn) if (tp+fn)>0 else 0.0)
+        return float(np.mean(vals))
+    def _macro_spec(yt, yp, ypr):
+        vals = []
+        for i in range(n_cls):
+            yt_b = (yt == i).astype(int); yp_b = (yp == i).astype(int)
+            cm_b = confusion_matrix(yt_b, yp_b, labels=[0,1])
+            tn,fp,fn,tp = cm_b.ravel() if cm_b.shape==(2,2) else (0,0,0,0)
+            vals.append(tn/(tn+fp) if (tn+fp)>0 else 0.0)
+        return float(np.mean(vals))
+    def _macro_ppv(yt, yp, ypr):
+        vals = []
+        for i in range(n_cls):
+            yt_b = (yt == i).astype(int); yp_b = (yp == i).astype(int)
+            cm_b = confusion_matrix(yt_b, yp_b, labels=[0,1])
+            tn,fp,fn,tp = cm_b.ravel() if cm_b.shape==(2,2) else (0,0,0,0)
+            vals.append(tp/(tp+fp) if (tp+fp)>0 else 0.0)
+        return float(np.mean(vals))
+    def _macro_npv(yt, yp, ypr):
+        vals = []
+        for i in range(n_cls):
+            yt_b = (yt == i).astype(int); yp_b = (yp == i).astype(int)
+            cm_b = confusion_matrix(yt_b, yp_b, labels=[0,1])
+            tn,fp,fn,tp = cm_b.ravel() if cm_b.shape==(2,2) else (0,0,0,0)
+            vals.append(tn/(tn+fn) if (tn+fn)>0 else 0.0)
+        return float(np.mean(vals))
+    def _macro_f1(yt, yp, ypr):
+        return float(f1_score(yt, yp, average='macro', zero_division=0))
+    def _acc(yt, yp, ypr):
+        return float(accuracy_score(yt, yp))
+    def _kappa(yt, yp, ypr):
+        return float(cohen_kappa_score(yt, yp))
+    def _macro_auc(yt, yp, ypr):
+        try:
+            if n_cls == 2:
+                return float(roc_auc_score(yt, ypr[:, 1]))
+            return float(roc_auc_score(yt, ypr, multi_class='ovr', average='macro'))
+        except:
+            return 0.0
+    def _wf1(yt, yp, ypr):
+        return float(f1_score(yt, yp, average='weighted', zero_division=0))
+    metric_fns = {
+        'Accuracy':    _acc,
+        'Macro_AUC':   _macro_auc,
+        'Sensitivity': _macro_sens,
+        'Specificity': _macro_spec,
+        'PPV':         _macro_ppv,
+        'NPV':         _macro_npv,
+        'Macro_F1':    _macro_f1,
+        'Weighted_F1': _wf1,
+        'Kappa':       _kappa,
+    }
+    result = {}
+    for name, fn in metric_fns.items():
+        pt, lo, hi = _bootstrap_ci(y_true, y_pred, y_proba, classes, fn,
+                                   n_bootstrap=n_bootstrap, seed=seed)
+        result[name] = pt
+        result[f'{name}_CI_low']  = lo
+        result[f'{name}_CI_high'] = hi
+    # 保留 report 字段以兼容原有代码
+    result['report'] = classification_report(
+        y_true, y_pred, labels=list(range(n_cls)), output_dict=True, zero_division=0)
+    return result, per_class_df
+def _fmt(val, lo, hi):
+    """格式化为 '0.xxx (0.xxx–0.xxx)' 供展示"""
+    if np.isnan(lo):
+        return f"{val:.4f}"
+    return f"{val:.4f} ({lo:.4f}–{hi:.4f})"
+def build_metrics_summary_df(ext_metrics, model_name, split_name):
+    """把 compute_extended_metrics_with_ci 结果转为单行 DataFrame，含 CI 列"""
+    keys = ['Accuracy','Macro_AUC','Sensitivity','Specificity',
+            'PPV','NPV','Macro_F1','Weighted_F1','Kappa']
+    row = {'Model': model_name, 'Split': split_name}
+    for k in keys:
+        row[k] = ext_metrics.get(k, np.nan)
+        row[f'{k}_95CI'] = _fmt(
+            ext_metrics.get(k, np.nan),
+            ext_metrics.get(f'{k}_CI_low', np.nan),
+            ext_metrics.get(f'{k}_CI_high', np.nan)
+        )
+    return pd.DataFrame([row])
 # ============================================================================
 # Model configs (multi-class compatible)
 # ============================================================================
             log(f"  📋 CSV: Col1=Label, Col2+=Features (no ID column)")
         fnames = X.columns.tolist()
         user_n = int(str(n_classes_select).split(" ")[0])
         detected_classes = sorted(y.unique())
         detected_classes = [int(c) if hasattr(c, 'item') else c for c in detected_classes]
         detected_n = len(detected_classes)
         n_classes = user_n
         log(f"  ✅ {n_classes} 分类 — 数据验证通过")
         label_map = {c: i for i, c in enumerate(classes)}
         label_map_inv = {i: c for c, i in label_map.items()}
         y_mapped = y.map(label_map)
             plot_multiclass_roc(r['all_yt'], r['all_yproba'], class_indices,
                 f'ROC — {mn} ({task_type}, Macro AUC={r["mean_auc"]:.3f})', f'roc_{mn}', rf)
+        # Combined ROC (macro per model)
         plt.figure(figsize=(10, 8))
         for i, mn in enumerate(mnames):
             r = amr[mn]
         # ====================================================================
         # ★★★ 新增 Part-1：训练集全模型 ROC / PR 曲线
         # ====================================================================
         progress(0.57, desc="📈 [新增] 训练集ROC/PR曲线...")
         log(f"\n  📈 [新增] 各模型训练集（in-sample）ROC / PR 曲线...")
         def _macro_roc_arrays(yt, yp, nc, cls_idx):
             y_b = label_binarize(yt, classes=cls_idx)
+            if nc == 2: y_b = np.hstack([1 - y_b, y_b])
             all_fpr = np.linspace(0, 1, 300)
             mean_tpr = np.zeros_like(all_fpr)
             for c in range(nc):
         def _macro_pr_arrays(yt, yp, nc, cls_idx):
             y_b = label_binarize(yt, classes=cls_idx)
+            if nc == 2: y_b = np.hstack([1 - y_b, y_b])
             all_rec = np.linspace(0, 1, 300)
             mean_prec = np.zeros_like(all_rec)
             for c in range(nc):
             mean_prec /= nc
             return all_rec, mean_prec
+        _tr_roc = {}; _tr_pr = {}
         for mn in mnames:
             yp_tr = tms[mn].predict_proba(X.values)
+            plot_multiclass_roc(y_mapped.values, yp_tr, class_indices,
+                f'Train ROC — {mn} ({task_type})', f'train_roc_{mn}', rf)
+            plot_multiclass_pr(y_mapped.values, yp_tr, class_indices,
+                f'Train PR — {mn} ({task_type})', f'train_pr_{mn}', rf)
             fpr_t, tpr_t, auc_t = _macro_roc_arrays(y_mapped.values, yp_tr, n_classes, class_indices)
             rec_t, prec_t = _macro_pr_arrays(y_mapped.values, yp_tr, n_classes, class_indices)
             _tr_roc[mn] = (fpr_t, tpr_t, auc_t)
             _tr_pr[mn]  = (rec_t, prec_t)
         plt.figure(figsize=(10, 8))
         for i, mn in enumerate(mnames):
             fpr_t, tpr_t, auc_t = _tr_roc[mn]
+            plt.plot(fpr_t, tpr_t, color=COLORS[i%8], lw=2.5, label=f'{mn} (Train Macro AUC={auc_t:.3f})')
+        plt.plot([0,1],[0,1],'--',color='#ccc',lw=1)
+        plt.xlim([-0.02,1.02]); plt.ylim([-0.02,1.02])
+        plt.xlabel('False Positive Rate',fontsize=13); plt.ylabel('True Positive Rate',fontsize=13)
+        plt.title(f'Train ROC — All Models ({task_type})',fontsize=14,fontweight='bold')
+        plt.legend(loc='lower right',fontsize=10); plt.grid(True,alpha=0.15); plt.tight_layout()
+        plt.savefig(os.path.join(rf,'train_roc_all.pdf'),format='pdf',bbox_inches='tight',dpi=300)
+        plt.savefig(os.path.join(rf,'train_roc_all.png'),format='png',bbox_inches='tight',dpi=150)
         plt.close()
         plt.figure(figsize=(10, 8))
         for i, mn in enumerate(mnames):
             rec_t, prec_t = _tr_pr[mn]
+            plt.plot(rec_t, prec_t, color=COLORS[i%8], lw=2.5, label=f'{mn} (Mean AP={prec_t.mean():.3f})')
+        plt.xlim([-0.02,1.02]); plt.ylim([-0.02,1.02])
+        plt.xlabel('Recall',fontsize=13); plt.ylabel('Precision',fontsize=13)
+        plt.title(f'Train PR — All Models ({task_type})',fontsize=14,fontweight='bold')
+        plt.legend(loc='lower left',fontsize=10); plt.grid(True,alpha=0.15); plt.tight_layout()
+        plt.savefig(os.path.join(rf,'train_pr_all.pdf'),format='pdf',bbox_inches='tight',dpi=300)
+        plt.savefig(os.path.join(rf,'train_pr_all.png'),format='png',bbox_inches='tight',dpi=150)
         plt.close()
+        log(f"     ✅ 训练集 ROC/PR 已生成（各模型独立图 + 汇总图）")
         # ====================================================================
+        # ★★★ 新增 Part-2：最终模型 Train vs Internal CV 对比（含扩展指标+CI）
         # ====================================================================
         progress(0.59, desc="📊 [新增] 最终模型Train vs CV对比...")
         log(f"\n  📊 [新增] 最终模型 [{best_mn}] 训练集 vs 内部验证集（CV holdout）...")
         yp_best_tr = tms[best_mn].predict_proba(X.values)
         yd_best_tr = tms[best_mn].predict(X.values)
         yp_best_cv = amr[best_mn]['all_yproba']
         yd_best_cv = amr[best_mn]['all_yp']
         yt_best_cv = amr[best_mn]['all_yt']
+        # 扩展指标（含95%CI）
+        met_tr_ext, pc_tr = compute_extended_metrics_with_ci(
+            y_mapped.values, yd_best_tr, yp_best_tr, class_indices, n_bootstrap=1000, seed=RS)
+        met_cv_ext, pc_cv = compute_extended_metrics_with_ci(
+            yt_best_cv, yd_best_cv, yp_best_cv, class_indices, n_bootstrap=1000, seed=RS)
+        # 保留原有兼容字段
+        met_tr = {k: met_tr_ext[k] for k in ['Accuracy','Macro_AUC','Macro_F1','Weighted_F1','Kappa','report']}
+        met_cv = {k: met_cv_ext[k] for k in ['Accuracy','Macro_AUC','Macro_F1','Weighted_F1','Kappa','report']}
         log(f"     Train  → AUC={met_tr['Macro_AUC']:.4f}  Acc={met_tr['Accuracy']:.4f}"
+            f"  Sens={met_tr_ext['Sensitivity']:.4f}  Spec={met_tr_ext['Specificity']:.4f}")
         log(f"     CV-Val → AUC={met_cv['Macro_AUC']:.4f}  Acc={met_cv['Accuracy']:.4f}"
+            f"  Sens={met_cv_ext['Sensitivity']:.4f}  Spec={met_cv_ext['Specificity']:.4f}")
+        # 对比 ROC
+        fpr_tb, tpr_tb, auc_tb = _macro_roc_arrays(y_mapped.values, yp_best_tr, n_classes, class_indices)
+        fpr_cb, tpr_cb, auc_cb = _macro_roc_arrays(yt_best_cv, yp_best_cv, n_classes, class_indices)
         fig, ax = plt.subplots(figsize=(10, 8))
+        ax.plot(fpr_tb, tpr_tb, color='#e41a1c', lw=2.5, label=f'Train set (Macro AUC={auc_tb:.3f})')
+        ax.plot(fpr_cb, tpr_cb, color='#377eb8', lw=2.5, linestyle='--', label=f'Internal CV (Macro AUC={auc_cb:.3f})')
+        ax.plot([0,1],[0,1],'--',color='#ccc',lw=1)
+        ax.set_xlim([-0.02,1.02]); ax.set_ylim([-0.02,1.02])
+        ax.set_xlabel('False Positive Rate',fontsize=13); ax.set_ylabel('True Positive Rate',fontsize=13)
+        ax.set_title(f'ROC — {best_mn}: Train vs Internal CV ({task_type})',fontsize=14,fontweight='bold')
+        ax.legend(loc='lower right',fontsize=11); ax.grid(True,alpha=0.15); plt.tight_layout()
+        plt.savefig(os.path.join(rf,f'roc_train_vs_cv_{best_mn}.pdf'),format='pdf',bbox_inches='tight',dpi=300)
+        plt.savefig(os.path.join(rf,f'roc_train_vs_cv_{best_mn}.png'),format='png',bbox_inches='tight',dpi=150)
         plt.close()
+        # 对比 PR
+        rec_tb, prec_tb = _macro_pr_arrays(y_mapped.values, yp_best_tr, n_classes, class_indices)
+        rec_cb, prec_cb = _macro_pr_arrays(yt_best_cv, yp_best_cv, n_classes, class_indices)
         fig, ax = plt.subplots(figsize=(10, 8))
+        ax.plot(rec_tb, prec_tb, color='#e41a1c', lw=2.5, label=f'Train set (Mean AP={prec_tb.mean():.3f})')
+        ax.plot(rec_cb, prec_cb, color='#377eb8', lw=2.5, linestyle='--', label=f'Internal CV (Mean AP={prec_cb.mean():.3f})')
+        ax.set_xlim([-0.02,1.02]); ax.set_ylim([-0.02,1.02])
+        ax.set_xlabel('Recall',fontsize=13); ax.set_ylabel('Precision',fontsize=13)
+        ax.set_title(f'PR — {best_mn}: Train vs Internal CV ({task_type})',fontsize=14,fontweight='bold')
+        ax.legend(loc='lower left',fontsize=11); ax.grid(True,alpha=0.15); plt.tight_layout()
+        plt.savefig(os.path.join(rf,f'pr_train_vs_cv_{best_mn}.pdf'),format='pdf',bbox_inches='tight',dpi=300)
+        plt.savefig(os.path.join(rf,f'pr_train_vs_cv_{best_mn}.png'),format='png',bbox_inches='tight',dpi=150)
         plt.close()
+        plot_confusion_matrix(y_mapped.values, yd_best_tr, class_indices,
+            f'Train CM — {best_mn} (Acc={met_tr["Accuracy"]:.3f})', f'cm_train_{best_mn}', rf)
+        # Train vs CV Excel（含扩展指标+CI）
+        with pd.ExcelWriter(os.path.join(rf,f'train_vs_cv_{best_mn}.xlsx'),engine='openpyxl') as w:
+            df_tr = build_metrics_summary_df(met_tr_ext, best_mn, 'Train')
+            df_cv = build_metrics_summary_df(met_cv_ext, best_mn, 'Internal_CV')
+            pd.concat([df_tr, df_cv], ignore_index=True).to_excel(w, sheet_name='Summary_with_CI', index=False)
+            # 简洁数字版（兼容旧格式）
             pd.DataFrame([
+                {'Split':'Train','Model':best_mn,
+                 'Macro_AUC':met_tr_ext['Macro_AUC'],'Accuracy':met_tr_ext['Accuracy'],
+                 'Sensitivity':met_tr_ext['Sensitivity'],'Specificity':met_tr_ext['Specificity'],
+                 'PPV':met_tr_ext['PPV'],'NPV':met_tr_ext['NPV'],
+                 'Macro_F1':met_tr_ext['Macro_F1'],'Weighted_F1':met_tr_ext['Weighted_F1'],
+                 'Kappa':met_tr_ext['Kappa']},
+                {'Split':'Internal_CV','Model':best_mn,
+                 'Macro_AUC':met_cv_ext['Macro_AUC'],'Accuracy':met_cv_ext['Accuracy'],
+                 'Sensitivity':met_cv_ext['Sensitivity'],'Specificity':met_cv_ext['Specificity'],
+                 'PPV':met_cv_ext['PPV'],'NPV':met_cv_ext['NPV'],
+                 'Macro_F1':met_cv_ext['Macro_F1'],'Weighted_F1':met_cv_ext['Weighted_F1'],
+                 'Kappa':met_cv_ext['Kappa']},
+            ]).to_excel(w, sheet_name='Summary_numeric', index=False)
+            pc_tr.to_excel(w, sheet_name='Train_PerClass', index=False)
+            pc_cv.to_excel(w, sheet_name='CV_PerClass',    index=False)
             amr[best_mn]['fold_df'].to_excel(w, sheet_name='CV_FoldDetail', index=False)
+        log(f"     ✅ Train vs CV 对比（含95%CI）已保存 → train_vs_cv_{best_mn}.xlsx")
         # ====================================================================
         # ★★★ 新增结束
         # ====================================================================
         progress(0.62, desc="🔥 SHAP分析...")
         log(f"\n  🔥 SHAP特征分析 (保留模型中 Top 3)...")
         shap_imp = {}
         models_for_shap = sorted(retained, key=lambda x: amr[x]['mean_auc'], reverse=True)[:3]
         for si, mn in enumerate(models_for_shap):
                     exp = shap.KernelExplainer(lambda x, m=mo: m.predict_proba(x), bg)
                     sv = exp.shap_values(Xs)
                 if isinstance(sv, list):
                     sv_abs = np.mean([np.abs(s) for s in sv], axis=0)
                 elif sv.ndim == 3:
+                    sv_abs = np.mean(np.abs(sv), axis=2)
                 else:
                     sv_abs = np.abs(sv)
                 idf = pd.DataFrame({'Feature': fnames, 'Importance': fi}).sort_values('Importance', ascending=False)
                 shap_imp[mn] = idf
                 plt.figure(figsize=(10, max(6, TOPN * 0.3)))
                 top_df = idf.head(TOPN).iloc[::-1]
                 plt.barh(top_df['Feature'], top_df['Importance'], color='#2563eb', alpha=0.8)
             except Exception as e:
                 log(f"     ⚠ {mn} SHAP失败: {e}")
+        # ── Feature Ablation ──  【原有代码，原封不动；Excel 中新增 p 值列】
         progress(0.72, desc="🧪 特征消融...")
         log(f"\n  🧪 特征消融 (仅最佳模型 {best_mn})...")
         ablation_data = None
             imp_df = shap_imp[best_mn]
             top_feats = imp_df.head(TOPN)['Feature'].tolist()
             fcs = []; aucs_a = []
+            # 同时收集每步全量 CV holdout 概率（用于相邻步 p 值）
+            all_probas_per_step = []
             for nf in range(1, len(top_feats) + 1):
                 Xsub = X[top_feats[:nf]]
                 fold_aucs = []
+                step_yt_all = []; step_yp_all = []; step_yproba_all = []
                 for tri, tei in skf.split(Xsub, y_mapped):
                     mf = deepcopy(mcfg[best_mn]['model'])
                     bp2 = bpd.get(best_mn, {})
                             a = roc_auc_score(yte_f, yproba_f, multi_class='ovr', average='macro')
                     except: a = 0.0
                     fold_aucs.append(a)
+                    step_yt_all.extend(yte_f.tolist())
+                    step_yp_all.extend(mf.predict(Xsub.iloc[tei].values).tolist())
+                    step_yproba_all.append(yproba_f)
                 fcs.append(nf); aucs_a.append(np.mean(fold_aucs))
+                all_probas_per_step.append({
+                    'yt':     np.array(step_yt_all),
+                    'yproba': np.vstack(step_yproba_all)
+                })
             full_auc = amr[best_mn]['mean_auc']
             opt_n = len(top_feats)
             for i, a in enumerate(aucs_a):
                 if a >= full_auc * 0.95:
                     opt_n = i + 1; break
+            ablation_data = {
+                'fcs': fcs, 'aucs': aucs_a, 'feats': top_feats,
+                'opt_n': opt_n, 'opt_feats': top_feats[:opt_n]
+            }
             log(f"     ✅ 最优特征数: {opt_n} (AUC={aucs_a[opt_n-1]:.4f} vs Full={full_auc:.4f})")
+            # 计算相邻特征数 Bootstrap p 值（vs full-feature model）
+            ref_step = all_probas_per_step[-1]  # full features
+            ablation_pvals = []
+            for si2, step in enumerate(all_probas_per_step):
+                if si2 == len(all_probas_per_step) - 1:
+                    ablation_pvals.append(np.nan)  # full vs full
+                    continue
+                p_v, _, _, _, _ = bootstrap_auc_test(
+                    ref_step['yt'], ref_step['yproba'], step['yproba'],
+                    class_indices, n_bootstrap=500, seed=RS
+                )
+                ablation_pvals.append(p_v)
+            # Plot（原有不变）
             plt.figure(figsize=(10, 7))
             plt.plot(fcs, aucs_a, 'o-', color='#2563eb', lw=2, ms=5)
+            plt.scatter([opt_n], [aucs_a[opt_n-1]], s=200, marker='*',
+                        color='#ef4444', edgecolors='black', lw=2, zorder=5)
+            plt.axhline(y=full_auc, color='gray', ls='--', lw=1, alpha=0.5,
+                        label=f'Full AUC={full_auc:.3f}')
             plt.xlabel('Number of Features', fontsize=13); plt.ylabel('Macro AUC', fontsize=13)
+            plt.title(f'Feature Ablation — {best_mn} (★ Optimal={opt_n})',
+                      fontsize=14, fontweight='bold')
             plt.legend(fontsize=11); plt.grid(True, alpha=0.15); plt.tight_layout()
             plt.savefig(os.path.join(rf, 'ablation.pdf'), format='pdf', bbox_inches='tight')
             plt.savefig(os.path.join(rf, 'ablation.png'), format='png', bbox_inches='tight', dpi=150)
             plt.close()
+        # ── External Validation ──  【原有代码，原封不动；Excel 新增扩展指标】
         val_files_list = [vf for vf in [val_file1, val_file2, val_file3] if vf is not None]
         final_feats = ablation_data['opt_feats'] if ablation_data else fnames
                 vcol2_is_id = (vcol2.dtype == 'object') or (vcol2.nunique() / len(vcol2) > 0.5)
                 Xe = ed.iloc[:, 2:] if vcol2_is_id else ed.iloc[:, 1:]
                 ye = ye_raw.map(label_map)
                 if ye.isna().any():
                     log(f"     ⚠ 验证集 {vi} 含有训练集中不存在的标签，已跳过")
                 yep = fm.predict_proba(Xes.values); yed = fm.predict(Xes.values)
                 ye_np = ye.values
+                # 原有基础指标
                 metrics = compute_multiclass_metrics(ye_np, yed, yep, class_indices)
+                # 新增扩展指标
+                met_ext_vi, pc_vi = compute_extended_metrics_with_ci(
+                    ye_np, yed, yep, class_indices, n_bootstrap=1000, seed=RS)
+                log(f"     ✅ AUC={metrics['Macro_AUC']:.4f}  Acc={metrics['Accuracy']:.4f}"
+                    f"  Sens={met_ext_vi['Sensitivity']:.4f}  Spec={met_ext_vi['Specificity']:.4f}"
+                    f"  PPV={met_ext_vi['PPV']:.4f}  NPV={met_ext_vi['NPV']:.4f}"
+                    f"  F1={metrics['Macro_F1']:.4f}  Kappa={metrics['Kappa']:.4f}")
                 sfx = f'_ext{vi}' if len(val_files_list) > 1 else '_ext'
                 tag = f'Validation {vi}' if len(val_files_list) > 1 else 'External'
                 plot_confusion_matrix(ye_np, yed, class_indices, f'CM — {tag} ({best_mn})', f'cm{sfx}', rf)
                 with pd.ExcelWriter(os.path.join(rf, f'validation{sfx}.xlsx'), engine='openpyxl') as w:
+                    # 原有 Metrics sheet（保持兼容）
                     pd.DataFrame([{'Model': best_mn, 'N_Features': len(final_feats),
                         'Macro_AUC': metrics['Macro_AUC'], 'Accuracy': metrics['Accuracy'],
                         'Macro_F1': metrics['Macro_F1'], 'Weighted_F1': metrics['Weighted_F1'],
                         'Kappa': metrics['Kappa']}]).to_excel(w, sheet_name='Metrics', index=False)
+                    # 新增：含 Sensitivity/Specificity/PPV/NPV + 95%CI
+                    build_metrics_summary_df(met_ext_vi, best_mn, tag).to_excel(
+                        w, sheet_name='Metrics_with_CI', index=False)
+                    pc_vi.to_excel(w, sheet_name='PerClass_detail', index=False)
                     rpt = pd.DataFrame(metrics['report']).T
                     rpt.to_excel(w, sheet_name='Per_Class', index=True)
                     pd.DataFrame({'Feature': final_feats}).to_excel(w, sheet_name='Features', index=False)
+        # ── Save Results ──  【原有代码，原封不动；新增扩展指标到 model_evaluation.xlsx】
         progress(0.92, desc="💾 保存结果...")
         log(f"\n  💾 保存结果...")
+        # 为所有模型计算 CV holdout 扩展指标（含 CI）
+        log(f"  🔬 [新增] 计算各模型完整诊断指标 + 95%CI（Bootstrap n=1000）...")
+        all_ext_metrics = {}
+        all_per_class   = {}
+        for mn in mnames:
+            r = amr[mn]
+            ext_m, pc_m = compute_extended_metrics_with_ci(
+                r['all_yt'], r['all_yp'], r['all_yproba'],
+                class_indices, n_bootstrap=1000, seed=RS)
+            all_ext_metrics[mn] = ext_m
+            all_per_class[mn]   = pc_m
         with pd.ExcelWriter(os.path.join(rf, 'model_evaluation.xlsx'), engine='openpyxl') as w:
+            # 原有：各模型分折明细
             for mn, r in amr.items():
                 r['fold_df'].to_excel(w, sheet_name=mn, index=False)
+            # 原有：Summary（保持原格式不变）
             sd = [{'Model': mn, 'Macro_AUC': r['mean_auc'], 'Accuracy': r['mean_acc'],
                    'Macro_F1': r['mean_f1'], 'Retained': 'Yes' if mn in retained else 'No',
                    'Best': 'Best' if mn == best_mn else ''}
                   for mn, r in amr.items()]
+            pd.DataFrame(sd).sort_values('Macro_AUC', ascending=False).to_excel(
+                w, sheet_name='Summary', index=False)
+            # 原有：Bootstrap 检验
             if len(bootstrap_df) > 0:
                 bootstrap_df.to_excel(w, sheet_name='Bootstrap_Test', index=False)
+            # 原有：最佳模型 PerClass
             best_report = classification_report(amr[best_mn]['all_yt'], amr[best_mn]['all_yp'],
                                                 labels=class_indices, output_dict=True, zero_division=0)
             pd.DataFrame(best_report).T.to_excel(w, sheet_name=f'{best_mn}_PerClass', index=True)
+            # ★ 新增：所有模型完整指标 + 95%CI（纵向汇总）
+            rows_ci = []
+            for mn in mnames:
+                row = build_metrics_summary_df(all_ext_metrics[mn], mn, 'CV_holdout')
+                rows_ci.append(row)
+            pd.concat(rows_ci, ignore_index=True).to_excel(
+                w, sheet_name='All_Models_Metrics_CI', index=False)
+            # ★ 新增：每个模型逐类详细指标
+            for mn in mnames:
+                sheet = f'{mn}_PerClass_detail'[:31]  # Excel sheet name limit
+                all_per_class[mn].to_excel(w, sheet_name=sheet, index=False)
+        # 特征消融 Excel（原有基础上新增 p 值列）
         if ablation_data:
             with pd.ExcelWriter(os.path.join(rf, 'feature_ablation.xlsx'), engine='openpyxl') as w:
+                # ★ 新增：Ablation sheet 加入 p 值
+                abl_df = pd.DataFrame({
+                    'N':      ablation_data['fcs'],
+                    'AUC':    ablation_data['aucs'],
+                    'P_vs_full (Bootstrap)': ablation_pvals,  # NaN for full
+                })
+                abl_df['Significant (p<0.05)'] = abl_df['P_vs_full (Bootstrap)'].apply(
+                    lambda x: 'Yes' if (not np.isnan(x) and x < 0.05) else ('No' if not np.isnan(x) else 'Ref'))
+                abl_df.to_excel(w, sheet_name='Ablation', index=False)
                 for mn, idf in shap_imp.items():
                     idf.to_excel(w, sheet_name=f'{mn}_Imp', index=False)
+        # Save params 【原有代码，原封不动】
         with open(os.path.join(rf, 'best_params.txt'), 'w', encoding='utf-8') as f:
             f.write(f"Task: {task_type} Classification ({n_classes} classes)\n")
             f.write(f"Classes: {classes}\n")
             if ablation_data:
                 f.write(f"\nOptimal Features ({ablation_data['opt_n']}): {', '.join(ablation_data['opt_feats'])}\n")
+        # Save model 【原有代码，原封不动】
         pickle.dump({
             'model_name': best_mn, 'model': tms[best_mn], 'best_params': bpd[best_mn],
             'classes': classes, 'n_classes': n_classes, 'label_map': label_map,