diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -3,26 +3,27 @@ ML Multi-Class Classification Pipeline (2-8 classes)
 Eye & ENT Hospital of Fudan University — Laboratory Medicine, Ren Jun
 Gradio 5.12.0 + Python 3.11
 
-Changelog v3 (vs v2):
-  [v3-1] compute_multiclass_metrics now returns full per-class and macro
-         AUC, Accuracy, Sensitivity (Recall), Specificity, Precision (PPV),
-         NPV, F1 for every class, plus macro/weighted averages.
-  [v3-2] Per-fold metrics table extended with all new indicators.
-  [v3-3] Summary sheets (Summary_InternalVal, Train_vs_InternalVal) carry
-         all new macro indicators.
-  [v3-4] Per-class detail sheets written for every model (train + val).
-  [v3-5] External validation Excel extended with all new indicators.
-  [v3-6] best_params.txt log extended with all new indicators.
-  [v3-7] Console log shows key new indicators.
-
-Previous fixes retained:
-  [FIX-1] XGBoost num_class=None bug
-  [FIX-2] Bootstrap p-value centered on 0
-  [FIX-3] SHAP 3D axis detection
-  [FIX-4] Per-model train-set ROC/PR/CM
-  [FIX-5] Best-model Train vs InternalVal overlay plots
-  [FIX-6] Train_vs_InternalVal Excel sheet
-  [FIX-7] Guest account expiry updated
+Changelog v4 (vs v3):
+  [v4-1] DeLong test (bootstrap implementation, multi-class safe) replaces
+         simple Bootstrap for step-1 model comparison.
+  [v4-2] Step-1 selection: best model by AUC + all models NOT significantly
+         different from it (DeLong p >= 0.05) → "delong_retained" group.
+  [v4-3] Step-2 ablation: run SHAP + incremental feature ablation for EVERY
+         model in delong_retained, each using its own SHAP ranking.
+  [v4-4] Within each ablation curve, DeLong test compares every N-feature
+         subset AUC vs the corresponding full-feature AUC of that model
+         (OOF predictions); first N where p >= 0.05 is "opt_n".
+  [v4-5] Final model selection: model with smallest opt_n; tie → highest
+         full-feature OOF AUC.
+  [v4-6] Combined ablation plot shows all retained models' ablation curves
+         with their optimal points annotated.
+  [v4-7] All ablation results, DeLong tables, and model-selection reasoning
+         saved to Excel + txt.
+
+All v3 features retained:
+  Full per-class Sensitivity/Specificity/PPV/NPV/F1/AUC metrics,
+  Train-set ROC/PR/CM, Train vs Val overlay, per-fold tables,
+  per-class Excel sheets, external validation, SHAP bar plots.
 """
 
 import numpy as np
@@ -42,7 +43,7 @@ from sklearn.metrics import (
     roc_auc_score, confusion_matrix, roc_curve,
     auc as auc_score, precision_recall_curve,
     classification_report, accuracy_score, f1_score,
-    cohen_kappa_score, precision_score, recall_score
+    cohen_kappa_score, precision_score, recall_score,
 )
 from sklearn.preprocessing import label_binarize
 import seaborn as sns
@@ -62,7 +63,6 @@ import gradio as gr
 
 warnings.filterwarnings('ignore')
 
-# Publication-quality plot settings
 plt.rcParams['font.family'] = 'serif'
 plt.rcParams['font.serif'] = ['Times New Roman', 'DejaVu Serif', 'serif']
 plt.rcParams['font.sans-serif'] = ['Arial', 'DejaVu Sans']
@@ -76,7 +76,7 @@ plt.rcParams['xtick.labelsize'] = 11
 plt.rcParams['ytick.labelsize'] = 11
 
 # ============================================================================
-# Cache Cleanup
+# Cache cleanup
 # ============================================================================
 CLEANUP_MAX_AGE_MINUTES = 30
 CLEANUP_INTERVAL_SECONDS = 600
@@ -96,127 +96,150 @@ def cleanup_old_temp_files():
 
 def periodic_cleanup():
     while True:
-        time.sleep(CLEANUP_INTERVAL_SECONDS)
-        cleanup_old_temp_files()
+        time.sleep(CLEANUP_INTERVAL_SECONDS); cleanup_old_temp_files()
+
+threading.Thread(target=periodic_cleanup, daemon=True).start()
 
-_ct = threading.Thread(target=periodic_cleanup, daemon=True); _ct.start()
 
 # ============================================================================
-# [v3-1] Extended metrics: Sensitivity, Specificity, PPV, NPV per class
+# [v4-1] DeLong test — bootstrap implementation, multi-class safe
 # ============================================================================
 
-def compute_per_class_sens_spec_ppv_npv(y_true, y_pred, y_proba, classes):
-    """
-    For each class c, treat it as a binary OvR problem:
-      TP = predicted c AND true c
-      FP = predicted c AND true != c
-      FN = predicted != c AND true c
-      TN = predicted != c AND true != c
+def _macro_auc_single(y_true, y_proba, n_classes):
+    """Compute macro AUC for a single sample set."""
+    try:
+        if n_classes == 2:
+            return roc_auc_score(y_true, y_proba[:, 1])
+        return roc_auc_score(y_true, y_proba, multi_class='ovr', average='macro')
+    except:
+        return np.nan
 
-    Returns a dict keyed by class index with:
-      Sensitivity (Recall / TPR), Specificity (TNR),
-      PPV (Precision), NPV, F1, AUC (OvR)
 
-    Also returns macro averages of each metric.
+def delong_test(y_true, proba_a, proba_b, classes, n_bootstrap=2000, seed=42):
+    """
+    Bootstrap-based DeLong-equivalent test for comparing two ROC curves.
+    Works for both binary and multi-class (macro AUC, OvR).
+
+    Returns
+    -------
+    p_value : float   two-sided p-value under H0: AUC_A == AUC_B
+    auc_a   : float
+    auc_b   : float
+    ci_low  : float   95 % CI of (AUC_A - AUC_B)
+    ci_high : float
+    z_score : float   observed_diff / bootstrap_SE
     """
+    rng = np.random.RandomState(seed)
+    n   = len(y_true)
+    nc  = len(classes)
+
+    auc_a = _macro_auc_single(y_true, proba_a, nc)
+    auc_b = _macro_auc_single(y_true, proba_b, nc)
+    observed_diff = auc_a - auc_b
+
+    diffs = []
+    for _ in range(n_bootstrap):
+        idx  = rng.choice(n, n, replace=True)
+        yt_b = y_true[idx]
+        # Skip resamples that lose a class (can't compute multi-class AUC)
+        if len(np.unique(yt_b)) < nc:
+            continue
+        a1 = _macro_auc_single(yt_b, proba_a[idx], nc)
+        a2 = _macro_auc_single(yt_b, proba_b[idx], nc)
+        if np.isnan(a1) or np.isnan(a2):
+            continue
+        diffs.append(a1 - a2)
+
+    if len(diffs) < 100:
+        return 1.0, auc_a, auc_b, -1.0, 1.0, 0.0
+
+    diffs   = np.array(diffs)
+    se      = diffs.std(ddof=1)
+    z_score = observed_diff / se if se > 0 else 0.0
+
+    # Two-sided p: fraction of |bootstrap diffs| >= |observed diff| (H0: diff=0)
+    p_value = float(np.mean(np.abs(diffs) >= np.abs(observed_diff)))
+    p_value = max(p_value, 1.0 / n_bootstrap)
+
+    ci_low  = float(np.percentile(diffs, 2.5))
+    ci_high = float(np.percentile(diffs, 97.5))
+
+    return p_value, float(auc_a), float(auc_b), ci_low, ci_high, z_score
+
+
+# ============================================================================
+# Metrics
+# ============================================================================
+
+def compute_per_class_sens_spec_ppv_npv(y_true, y_pred, y_proba, classes):
     n_classes = len(classes)
-    y_true = np.asarray(y_true)
-    y_pred = np.asarray(y_pred)
+    y_true = np.asarray(y_true); y_pred = np.asarray(y_pred)
     y_bin  = label_binarize(y_true, classes=classes)
     if n_classes == 2:
         y_bin = np.hstack([1 - y_bin, y_bin])
 
     per_class = {}
     for i, c in enumerate(classes):
-        yt_b = y_bin[:, i]          # true binary label for class c
+        yt_b = y_bin[:, i]
         yp_b = (y_pred == c).astype(int)
-
         TP = int(np.sum((yt_b == 1) & (yp_b == 1)))
         FP = int(np.sum((yt_b == 0) & (yp_b == 1)))
         FN = int(np.sum((yt_b == 1) & (yp_b == 0)))
         TN = int(np.sum((yt_b == 0) & (yp_b == 0)))
-
-        sens = TP / (TP + FN) if (TP + FN) > 0 else 0.0   # Sensitivity = Recall
-        spec = TN / (TN + FP) if (TN + FP) > 0 else 0.0   # Specificity
-        ppv  = TP / (TP + FP) if (TP + FP) > 0 else 0.0   # PPV = Precision
-        npv  = TN / (TN + FN) if (TN + FN) > 0 else 0.0   # NPV
-        f1   = (2 * ppv * sens / (ppv + sens)) if (ppv + sens) > 0 else 0.0
-
+        sens = TP / (TP + FN) if (TP + FN) > 0 else 0.0
+        spec = TN / (TN + FP) if (TN + FP) > 0 else 0.0
+        ppv  = TP / (TP + FP) if (TP + FP) > 0 else 0.0
+        npv  = TN / (TN + FN) if (TN + FN) > 0 else 0.0
+        f1   = 2 * ppv * sens / (ppv + sens) if (ppv + sens) > 0 else 0.0
         try:
             auc_c = roc_auc_score(yt_b, y_proba[:, i])
         except:
             auc_c = 0.0
-
         per_class[c] = {
             'TP': TP, 'FP': FP, 'FN': FN, 'TN': TN,
             'Sensitivity': sens, 'Specificity': spec,
-            'PPV': ppv, 'NPV': npv, 'F1': f1, 'AUC': auc_c
+            'PPV': ppv, 'NPV': npv, 'F1': f1, 'AUC': auc_c,
         }
 
-    # Macro averages
-    macro = {}
-    for metric in ['Sensitivity', 'Specificity', 'PPV', 'NPV', 'F1', 'AUC']:
-        macro[f'Macro_{metric}'] = np.mean([per_class[c][metric] for c in classes])
-
+    macro = {f'Macro_{m}': np.mean([per_class[c][m] for c in classes])
+             for m in ['Sensitivity','Specificity','PPV','NPV','F1','AUC']}
     return per_class, macro
 
 
 def compute_multiclass_metrics(y_true, y_pred, y_proba, classes):
-    """
-    [v3-1] Extended: returns AUC, Accuracy, Sensitivity, Specificity,
-    Precision (PPV), NPV, F1 — macro and per-class — plus Kappa.
-    """
     n_classes = len(classes)
-    y_true = np.asarray(y_true)
-    y_pred = np.asarray(y_pred)
-
+    y_true = np.asarray(y_true); y_pred = np.asarray(y_pred)
     acc   = accuracy_score(y_true, y_pred)
     kappa = cohen_kappa_score(y_true, y_pred)
-
-    # Macro AUC
     try:
-        if n_classes == 2:
-            macro_auc = roc_auc_score(y_true, y_proba[:, 1])
-        else:
-            macro_auc = roc_auc_score(y_true, y_proba,
-                                       multi_class='ovr', average='macro')
+        macro_auc = (_macro_auc_single(y_true, y_proba, n_classes)
+                     if not np.isnan(_macro_auc_single(y_true, y_proba, n_classes))
+                     else 0.0)
     except:
         macro_auc = 0.0
-
-    # sklearn macro/weighted aggregates
-    f1_macro     = f1_score(y_true, y_pred, average='macro',    zero_division=0, labels=classes)
-    f1_weighted  = f1_score(y_true, y_pred, average='weighted', zero_division=0, labels=classes)
-    prec_macro   = precision_score(y_true, y_pred, average='macro',    zero_division=0, labels=classes)
-    recall_macro = recall_score(   y_true, y_pred, average='macro',    zero_division=0, labels=classes)
-
-    # Per-class Sensitivity / Specificity / PPV / NPV / F1 / AUC
+    f1_macro    = f1_score(y_true, y_pred, average='macro',    zero_division=0, labels=classes)
+    f1_weighted = f1_score(y_true, y_pred, average='weighted', zero_division=0, labels=classes)
     per_class, macro_ext = compute_per_class_sens_spec_ppv_npv(
         y_true, y_pred, y_proba, classes)
-
-    # sklearn classification_report (for precision/recall/f1 by class)
     report = classification_report(
         y_true, y_pred, labels=classes, output_dict=True, zero_division=0)
-
     return {
-        # ── Macro aggregates ──
         'Accuracy':          acc,
         'Macro_AUC':         macro_auc,
-        'Macro_Sensitivity': macro_ext['Macro_Sensitivity'],   # == Macro Recall
+        'Macro_Sensitivity': macro_ext['Macro_Sensitivity'],
         'Macro_Specificity': macro_ext['Macro_Specificity'],
-        'Macro_PPV':         macro_ext['Macro_PPV'],           # == Macro Precision
+        'Macro_PPV':         macro_ext['Macro_PPV'],
         'Macro_NPV':         macro_ext['Macro_NPV'],
         'Macro_F1':          macro_ext['Macro_F1'],
         'Weighted_F1':       f1_weighted,
         'Kappa':             kappa,
-        # ── Per-class detail ──
-        'per_class':         per_class,   # dict keyed by class value
+        'per_class':         per_class,
         'report':            report,
     }
 
 
 def metrics_to_flat_row(metrics, prefix=''):
-    """Flatten a metrics dict into a single-row dict for DataFrame construction."""
-    row = {
+    return {
         f'{prefix}AUC':         metrics['Macro_AUC'],
         f'{prefix}Accuracy':    metrics['Accuracy'],
         f'{prefix}Sensitivity': metrics['Macro_Sensitivity'],
@@ -227,36 +250,22 @@ def metrics_to_flat_row(metrics, prefix=''):
         f'{prefix}Weighted_F1': metrics['Weighted_F1'],
         f'{prefix}Kappa':       metrics['Kappa'],
     }
-    return row
 
 
 def per_class_df(metrics, classes):
-    """Build a tidy per-class DataFrame from compute_multiclass_metrics output."""
     rows = []
     for c in classes:
         pc = metrics['per_class'][c]
-        rows.append({
-            'Class':       c,
-            'AUC':         pc['AUC'],
-            'Sensitivity': pc['Sensitivity'],
-            'Specificity': pc['Specificity'],
-            'PPV':         pc['PPV'],
-            'NPV':         pc['NPV'],
-            'F1':          pc['F1'],
-            'TP':          pc['TP'],
-            'FP':          pc['FP'],
-            'FN':          pc['FN'],
-            'TN':          pc['TN'],
-        })
-    # Append macro row
+        rows.append({'Class': c, **{k: pc[k] for k in
+            ['AUC','Sensitivity','Specificity','PPV','NPV','F1','TP','FP','FN','TN']}})
     rows.append({
-        'Class':       'Macro',
-        'AUC':         metrics['Macro_AUC'],
+        'Class': 'Macro',
+        'AUC':   metrics['Macro_AUC'],
         'Sensitivity': metrics['Macro_Sensitivity'],
         'Specificity': metrics['Macro_Specificity'],
-        'PPV':         metrics['Macro_PPV'],
-        'NPV':         metrics['Macro_NPV'],
-        'F1':          metrics['Macro_F1'],
+        'PPV':   metrics['Macro_PPV'],
+        'NPV':   metrics['Macro_NPV'],
+        'F1':    metrics['Macro_F1'],
         'TP': '', 'FP': '', 'FN': '', 'TN': '',
     })
     return pd.DataFrame(rows)
@@ -265,258 +274,327 @@ def per_class_df(metrics, classes):
 # ============================================================================
 # Plotting helpers
 # ============================================================================
-
-def plot_multiclass_roc(y_true, y_proba, classes, title, filepath_prefix, rf):
-    """Plot ROC curves: one-vs-rest for each class + macro average."""
-    n_classes = len(classes)
-    y_bin = label_binarize(y_true, classes=classes)
-    if n_classes == 2:
-        y_bin = np.hstack([1 - y_bin, y_bin])
-
-    fpr_dict, tpr_dict, auc_dict = {}, {}, {}
-    for i in range(n_classes):
-        fpr_dict[i], tpr_dict[i], _ = roc_curve(y_bin[:, i], y_proba[:, i])
-        auc_dict[i] = auc_score(fpr_dict[i], tpr_dict[i])
-
-    all_fpr = np.unique(np.concatenate([fpr_dict[i] for i in range(n_classes)]))
-    mean_tpr = np.zeros_like(all_fpr)
-    for i in range(n_classes):
-        mean_tpr += np.interp(all_fpr, fpr_dict[i], tpr_dict[i])
-    mean_tpr /= n_classes
-    macro_auc = auc_score(all_fpr, mean_tpr)
-
-    COLORS = ['#e41a1c','#377eb8','#4daf4a','#984ea3',
-              '#ff7f00','#a65628','#f781bf','#999999']
-    plt.figure(figsize=(10, 8))
-    for i in range(n_classes):
-        plt.plot(fpr_dict[i], tpr_dict[i], color=COLORS[i % len(COLORS)], lw=2,
-                 label=f'Class {classes[i]} (AUC={auc_dict[i]:.3f})')
-    plt.plot(all_fpr, mean_tpr, 'k--', lw=2.5,
-             label=f'Macro Avg (AUC={macro_auc:.3f})')
-    plt.plot([0,1],[0,1],'--',color='#cccccc',lw=1)
+PLOT_COLORS = ['#e41a1c','#377eb8','#4daf4a','#984ea3',
+               '#ff7f00','#a65628','#f781bf','#999999']
+MODEL_COLORS = ['#2563eb','#f59e0b','#10b981','#ef4444',
+                '#8b5cf6','#ec4899','#06b6d4','#6b7280']
+
+
+def plot_multiclass_roc(y_true, y_proba, classes, title, prefix, rf):
+    n  = len(classes)
+    yb = label_binarize(y_true, classes=classes)
+    if n == 2: yb = np.hstack([1 - yb, yb])
+    fpr_d, tpr_d, auc_d = {}, {}, {}
+    for i in range(n):
+        fpr_d[i], tpr_d[i], _ = roc_curve(yb[:, i], y_proba[:, i])
+        auc_d[i] = auc_score(fpr_d[i], tpr_d[i])
+    all_fpr = np.unique(np.concatenate([fpr_d[i] for i in range(n)]))
+    mt = np.zeros_like(all_fpr)
+    for i in range(n): mt += np.interp(all_fpr, fpr_d[i], tpr_d[i])
+    mt /= n; ma = auc_score(all_fpr, mt)
+    plt.figure(figsize=(10,8))
+    for i in range(n):
+        plt.plot(fpr_d[i], tpr_d[i], color=PLOT_COLORS[i%8], lw=2,
+                 label=f'Class {classes[i]} (AUC={auc_d[i]:.3f})')
+    plt.plot(all_fpr, mt, 'k--', lw=2.5, label=f'Macro Avg (AUC={ma:.3f})')
+    plt.plot([0,1],[0,1],'--',color='#ccc',lw=1)
     plt.xlim([-0.02,1.02]); plt.ylim([-0.02,1.02])
-    plt.xlabel('False Positive Rate', fontsize=13)
-    plt.ylabel('True Positive Rate', fontsize=13)
-    plt.title(title, fontsize=14, fontweight='bold')
-    plt.legend(loc='lower right', fontsize=9)
-    plt.grid(True, alpha=0.15); plt.tight_layout()
-    plt.savefig(os.path.join(rf, f'{filepath_prefix}.pdf'),
-                format='pdf', bbox_inches='tight', dpi=300)
-    plt.savefig(os.path.join(rf, f'{filepath_prefix}.png'),
-                format='png', bbox_inches='tight', dpi=150)
-    plt.close()
-    return macro_auc, auc_dict
-
-
-def plot_multiclass_pr(y_true, y_proba, classes, title, filepath_prefix, rf):
-    """Plot Precision-Recall curves for each class."""
-    n_classes = len(classes)
-    y_bin = label_binarize(y_true, classes=classes)
-    if n_classes == 2:
-        y_bin = np.hstack([1 - y_bin, y_bin])
-
-    COLORS = ['#e41a1c','#377eb8','#4daf4a','#984ea3',
-              '#ff7f00','#a65628','#f781bf','#999999']
-    plt.figure(figsize=(10, 8))
-    for i in range(n_classes):
-        prec, rec, _ = precision_recall_curve(y_bin[:, i], y_proba[:, i])
-        ap = auc_score(rec, prec)
-        plt.plot(rec, prec, color=COLORS[i % len(COLORS)], lw=2,
-                 label=f'Class {classes[i]} (AP={ap:.3f})')
+    plt.xlabel('False Positive Rate',fontsize=13); plt.ylabel('True Positive Rate',fontsize=13)
+    plt.title(title,fontsize=14,fontweight='bold')
+    plt.legend(loc='lower right',fontsize=9); plt.grid(True,alpha=0.15); plt.tight_layout()
+    for ext,dpi in [('pdf',300),('png',150)]:
+        plt.savefig(os.path.join(rf,f'{prefix}.{ext}'),format=ext,bbox_inches='tight',dpi=dpi)
+    plt.close(); return ma, auc_d
+
+
+def plot_multiclass_pr(y_true, y_proba, classes, title, prefix, rf):
+    n  = len(classes)
+    yb = label_binarize(y_true, classes=classes)
+    if n == 2: yb = np.hstack([1 - yb, yb])
+    plt.figure(figsize=(10,8))
+    for i in range(n):
+        p,r,_ = precision_recall_curve(yb[:,i], y_proba[:,i])
+        plt.plot(r, p, color=PLOT_COLORS[i%8], lw=2,
+                 label=f'Class {classes[i]} (AP={auc_score(r,p):.3f})')
     plt.xlim([-0.02,1.02]); plt.ylim([-0.02,1.02])
-    plt.xlabel('Recall', fontsize=13); plt.ylabel('Precision', fontsize=13)
-    plt.title(title, fontsize=14, fontweight='bold')
-    plt.legend(loc='lower left', fontsize=9)
-    plt.grid(True, alpha=0.15); plt.tight_layout()
-    plt.savefig(os.path.join(rf, f'{filepath_prefix}.pdf'),
-                format='pdf', bbox_inches='tight', dpi=300)
-    plt.savefig(os.path.join(rf, f'{filepath_prefix}.png'),
-                format='png', bbox_inches='tight', dpi=150)
+    plt.xlabel('Recall',fontsize=13); plt.ylabel('Precision',fontsize=13)
+    plt.title(title,fontsize=14,fontweight='bold')
+    plt.legend(loc='lower left',fontsize=9); plt.grid(True,alpha=0.15); plt.tight_layout()
+    for ext,dpi in [('pdf',300),('png',150)]:
+        plt.savefig(os.path.join(rf,f'{prefix}.{ext}'),format=ext,bbox_inches='tight',dpi=dpi)
     plt.close()
 
 
-def plot_confusion_matrix(y_true, y_pred, classes, title, filepath_prefix, rf):
-    """Plot confusion matrix heatmap."""
+def plot_confusion_matrix(y_true, y_pred, classes, title, prefix, rf):
     cm = confusion_matrix(y_true, y_pred, labels=classes)
-    plt.figure(figsize=(max(6, len(classes)*1.2), max(5, len(classes)*1.0)))
+    plt.figure(figsize=(max(6,len(classes)*1.2), max(5,len(classes)*1.0)))
     sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=True,
-                xticklabels=classes, yticklabels=classes,
-                annot_kws={'fontsize': 11})
-    plt.xlabel('Predicted', fontsize=12); plt.ylabel('True', fontsize=12)
-    plt.title(title, fontsize=13, fontweight='bold'); plt.tight_layout()
-    plt.savefig(os.path.join(rf, f'{filepath_prefix}.pdf'),
-                format='pdf', bbox_inches='tight', dpi=300)
-    plt.savefig(os.path.join(rf, f'{filepath_prefix}.png'),
-                format='png', bbox_inches='tight', dpi=150)
-    plt.close()
-    return cm
-
-
-def plot_train_vs_val_roc(y_train, train_proba, y_val, val_proba,
-                           classes, model_name, filepath_prefix, rf):
-    """Overlay train-set ROC and internal-validation (CV OOF) ROC."""
-    n_classes = len(classes)
-
-    def macro_roc(y_true, y_proba):
-        y_bin = label_binarize(y_true, classes=classes)
-        if n_classes == 2:
-            y_bin = np.hstack([1 - y_bin, y_bin])
-        all_fpr = np.linspace(0, 1, 300)
-        mean_tpr = np.zeros_like(all_fpr)
-        for i in range(n_classes):
-            f, t, _ = roc_curve(y_bin[:, i], y_proba[:, i])
-            mean_tpr += np.interp(all_fpr, f, t)
-        mean_tpr /= n_classes; mean_tpr[-1] = 1.0
-        return all_fpr, mean_tpr, auc_score(all_fpr, mean_tpr)
-
-    fpr_tr, tpr_tr, auc_tr = macro_roc(y_train, train_proba)
-    fpr_vl, tpr_vl, auc_vl = macro_roc(y_val,   val_proba)
-
-    plt.figure(figsize=(10, 8))
-    plt.plot(fpr_tr, tpr_tr, color='#e41a1c', lw=2.5,
+                xticklabels=classes, yticklabels=classes, annot_kws={'fontsize':11})
+    plt.xlabel('Predicted',fontsize=12); plt.ylabel('True',fontsize=12)
+    plt.title(title,fontsize=13,fontweight='bold'); plt.tight_layout()
+    for ext,dpi in [('pdf',300),('png',150)]:
+        plt.savefig(os.path.join(rf,f'{prefix}.{ext}'),format=ext,bbox_inches='tight',dpi=dpi)
+    plt.close(); return cm
+
+
+def _macro_roc_curve(y_true, y_proba, classes):
+    n  = len(classes)
+    yb = label_binarize(y_true, classes=classes)
+    if n == 2: yb = np.hstack([1 - yb, yb])
+    all_fpr = np.linspace(0,1,300); mt = np.zeros_like(all_fpr)
+    for i in range(n):
+        f,t,_ = roc_curve(yb[:,i], y_proba[:,i])
+        mt += np.interp(all_fpr, f, t)
+    mt /= n; mt[-1] = 1.0
+    return all_fpr, mt, auc_score(all_fpr, mt)
+
+
+def plot_train_vs_val_roc(y_train, tp, y_val, vp, classes, mn, prefix, rf):
+    fpr_tr,tpr_tr,auc_tr = _macro_roc_curve(y_train, tp, classes)
+    fpr_vl,tpr_vl,auc_vl = _macro_roc_curve(y_val,   vp, classes)
+    plt.figure(figsize=(10,8))
+    plt.plot(fpr_tr,tpr_tr,color='#e41a1c',lw=2.5,
              label=f'Training set (Macro AUC={auc_tr:.3f})')
-    plt.plot(fpr_vl, tpr_vl, color='#377eb8', lw=2.5, linestyle='--',
+    plt.plot(fpr_vl,tpr_vl,color='#377eb8',lw=2.5,linestyle='--',
              label=f'Internal validation / CV-OOF (Macro AUC={auc_vl:.3f})')
-    plt.plot([0,1],[0,1],'--',color='#cccccc',lw=1)
+    plt.plot([0,1],[0,1],'--',color='#ccc',lw=1)
     plt.xlim([-0.02,1.02]); plt.ylim([-0.02,1.02])
-    plt.xlabel('False Positive Rate', fontsize=13)
-    plt.ylabel('True Positive Rate', fontsize=13)
-    plt.title(f'ROC — Train vs Internal Validation — {model_name}',
-              fontsize=14, fontweight='bold')
-    plt.legend(loc='lower right', fontsize=11)
-    plt.grid(True, alpha=0.15); plt.tight_layout()
-    plt.savefig(os.path.join(rf, f'{filepath_prefix}.pdf'),
-                format='pdf', bbox_inches='tight', dpi=300)
-    plt.savefig(os.path.join(rf, f'{filepath_prefix}.png'),
-                format='png', bbox_inches='tight', dpi=150)
-    plt.close()
-    return auc_tr, auc_vl
-
-
-def plot_train_vs_val_pr(y_train, train_proba, y_val, val_proba,
-                          classes, model_name, filepath_prefix, rf):
-    """Overlay train-set PR and internal-validation PR."""
-    n_classes = len(classes)
-
-    def macro_pr(y_true, y_proba):
-        y_bin = label_binarize(y_true, classes=classes)
-        if n_classes == 2:
-            y_bin = np.hstack([1 - y_bin, y_bin])
-        all_rec = np.linspace(0, 1, 300)
-        mean_prec = np.zeros_like(all_rec)
-        for i in range(n_classes):
-            prec, rec, _ = precision_recall_curve(y_bin[:, i], y_proba[:, i])
-            mean_prec += np.interp(all_rec, rec[::-1], prec[::-1])
-        mean_prec /= n_classes
-        return all_rec, mean_prec, auc_score(all_rec, mean_prec)
-
-    rec_tr, prec_tr, ap_tr = macro_pr(y_train, train_proba)
-    rec_vl, prec_vl, ap_vl = macro_pr(y_val,   val_proba)
-
-    plt.figure(figsize=(10, 8))
-    plt.plot(rec_tr, prec_tr, color='#e41a1c', lw=2.5,
-             label=f'Training set (Macro AP={ap_tr:.3f})')
-    plt.plot(rec_vl, prec_vl, color='#377eb8', lw=2.5, linestyle='--',
+    plt.xlabel('False Positive Rate',fontsize=13); plt.ylabel('True Positive Rate',fontsize=13)
+    plt.title(f'ROC — Train vs Internal Validation — {mn}',fontsize=14,fontweight='bold')
+    plt.legend(loc='lower right',fontsize=11); plt.grid(True,alpha=0.15); plt.tight_layout()
+    for ext,dpi in [('pdf',300),('png',150)]:
+        plt.savefig(os.path.join(rf,f'{prefix}.{ext}'),format=ext,bbox_inches='tight',dpi=dpi)
+    plt.close(); return auc_tr, auc_vl
+
+
+def plot_train_vs_val_pr(y_train, tp, y_val, vp, classes, mn, prefix, rf):
+    n = len(classes)
+    def macro_pr(yt, yp):
+        yb = label_binarize(yt, classes=classes)
+        if n==2: yb = np.hstack([1-yb, yb])
+        ar = np.linspace(0,1,300); mp = np.zeros_like(ar)
+        for i in range(n):
+            p,r,_ = precision_recall_curve(yb[:,i], yp[:,i])
+            mp += np.interp(ar, r[::-1], p[::-1])
+        mp /= n; return ar, mp, auc_score(ar, mp)
+    ar_tr,mp_tr,ap_tr = macro_pr(y_train, tp)
+    ar_vl,mp_vl,ap_vl = macro_pr(y_val,   vp)
+    plt.figure(figsize=(10,8))
+    plt.plot(ar_tr,mp_tr,color='#e41a1c',lw=2.5,label=f'Training set (Macro AP={ap_tr:.3f})')
+    plt.plot(ar_vl,mp_vl,color='#377eb8',lw=2.5,linestyle='--',
              label=f'Internal validation / CV-OOF (Macro AP={ap_vl:.3f})')
     plt.xlim([-0.02,1.02]); plt.ylim([-0.02,1.02])
-    plt.xlabel('Recall', fontsize=13); plt.ylabel('Precision', fontsize=13)
-    plt.title(f'PR — Train vs Internal Validation — {model_name}',
-              fontsize=14, fontweight='bold')
-    plt.legend(loc='lower left', fontsize=11)
-    plt.grid(True, alpha=0.15); plt.tight_layout()
-    plt.savefig(os.path.join(rf, f'{filepath_prefix}.pdf'),
-                format='pdf', bbox_inches='tight', dpi=300)
-    plt.savefig(os.path.join(rf, f'{filepath_prefix}.png'),
-                format='png', bbox_inches='tight', dpi=150)
-    plt.close()
-    return ap_tr, ap_vl
+    plt.xlabel('Recall',fontsize=13); plt.ylabel('Precision',fontsize=13)
+    plt.title(f'PR — Train vs Internal Validation — {mn}',fontsize=14,fontweight='bold')
+    plt.legend(loc='lower left',fontsize=11); plt.grid(True,alpha=0.15); plt.tight_layout()
+    for ext,dpi in [('pdf',300),('png',150)]:
+        plt.savefig(os.path.join(rf,f'{prefix}.{ext}'),format=ext,bbox_inches='tight',dpi=dpi)
+    plt.close(); return ap_tr, ap_vl
 
 
 # ============================================================================
-# Bootstrap AUC test  [FIX-2 retained]
+# [v4-3/4] Per-model SHAP + feature ablation with DeLong vs full-feature
 # ============================================================================
 
-def bootstrap_auc_test(y_true, proba_a, proba_b, classes,
-                        n_bootstrap=2000, seed=42):
-    rng = np.random.RandomState(seed)
-    n = len(y_true)
-    n_classes = len(classes)
+def run_shap_for_model(mn, mo, X, fnames, n_classes, SHAPSZ, RS):
+    """Compute mean |SHAP| importances for one model. Returns sorted DataFrame."""
+    Xshap = X.values
+    ns    = min(SHAPSZ, Xshap.shape[0])
+    np.random.seed(RS)
+    sidx  = np.random.choice(Xshap.shape[0], ns, replace=False)
+    Xs    = Xshap[sidx]
+    if mn in ['RF','XGB','DT','AdaBoost']:
+        exp = shap.TreeExplainer(mo); sv = exp.shap_values(Xs)
+    else:
+        bg  = Xs[np.random.choice(ns, min(50,ns), replace=False)]
+        exp = shap.KernelExplainer(lambda x, m=mo: m.predict_proba(x), bg)
+        sv  = exp.shap_values(Xs)
+    # Robust shape handling
+    if isinstance(sv, list):
+        sv_abs = np.mean([np.abs(s) for s in sv], axis=0)
+    elif sv.ndim == 3:
+        sv_abs = np.mean(np.abs(sv), axis=(2 if sv.shape[2] <= sv.shape[1] else 1))
+    else:
+        sv_abs = np.abs(sv)
+    fi = sv_abs.mean(axis=0)
+    if len(fi) > len(fnames): fi = fi[:len(fnames)]
+    elif len(fi) < len(fnames): fi = np.pad(fi,(0,len(fnames)-len(fi)))
+    return (pd.DataFrame({'Feature': fnames, 'Importance': fi})
+            .sort_values('Importance', ascending=False)
+            .reset_index(drop=True))
+
+
+def run_ablation_for_model(mn, mcfg, bpd, X, y_mapped, class_indices,
+                            n_classes, top_feats, skf, full_yt, full_yproba,
+                            ALPHA=0.05, N_BOOT=2000):
+    """
+    Incremental feature ablation for one model using its SHAP ranking.
 
-    def calc_macro_auc(yt, pa, pb):
-        try:
-            if n_classes == 2:
-                a1 = roc_auc_score(yt, pa[:, 1])
-                a2 = roc_auc_score(yt, pb[:, 1])
-            else:
-                a1 = roc_auc_score(yt, pa, multi_class='ovr', average='macro')
-                a2 = roc_auc_score(yt, pb, multi_class='ovr', average='macro')
-            return a1, a2
-        except:
-            return 0.0, 0.0
+    For each subset size N (1 … len(top_feats)):
+      - CV OOF predictions on top N features
+      - Collect OOF proba vectors (same fold splits → directly comparable with full)
+      - DeLong test: subset OOF AUC vs full-feature OOF AUC
 
-    auc_a, auc_b = calc_macro_auc(y_true, proba_a, proba_b)
-    observed_diff = auc_a - auc_b
-    diffs = []
-    for _ in range(n_bootstrap):
-        idx = rng.choice(n, n, replace=True)
-        yt_b = y_true[idx]; pa_b = proba_a[idx]; pb_b = proba_b[idx]
-        if len(np.unique(yt_b)) < n_classes:
-            continue
-        a1, a2 = calc_macro_auc(yt_b, pa_b, pb_b)
-        diffs.append(a1 - a2)
+    opt_n = first N where DeLong p >= ALPHA  (no significant difference from full)
 
-    if len(diffs) < 100:
-        return 1.0, auc_a, auc_b, -1, 1
+    Returns dict with:
+      fcs, aucs, p_values, z_scores, ci_lows, ci_highs,
+      opt_n, opt_feats, opt_auc, delong_rows
+    """
+    cf   = mcfg[mn]
+    bp   = bpd.get(mn, {}) if isinstance(bpd.get(mn,{}), dict) else {}
+    full_auc = _macro_auc_single(full_yt, full_yproba, n_classes)
+
+    fcs=[]; aucs=[]; pvals=[]; zscores=[]; ci_los=[]; ci_his=[]
+    delong_rows=[]
+
+    for nf in range(1, len(top_feats)+1):
+        Xsub = X[top_feats[:nf]]
+        # Collect OOF probas in fold order (same skf splits → aligned with full_yt)
+        sub_yt=[]; sub_yp=[]
+        for tri,tei in skf.split(Xsub, y_mapped):
+            mf = deepcopy(cf['model'])
+            if bp: mf.set_params(**bp)
+            mf.fit(Xsub.iloc[tri].values, y_mapped.iloc[tri])
+            sub_yp.append(mf.predict_proba(Xsub.iloc[tei].values))
+            sub_yt.extend(y_mapped.iloc[tei].tolist())
+
+        sub_yt    = np.array(sub_yt)
+        sub_yproba = np.vstack(sub_yp)
+
+        sub_auc = _macro_auc_single(sub_yt, sub_yproba, n_classes)
+        if np.isnan(sub_auc): sub_auc = 0.0
+
+        # DeLong: subset vs full (both evaluated on the same OOF indices)
+        p, a_sub, a_full, ci_lo, ci_hi, z = delong_test(
+            full_yt, sub_yproba, full_yproba, class_indices,
+            n_bootstrap=N_BOOT, seed=42)
+
+        fcs.append(nf); aucs.append(sub_auc)
+        pvals.append(p); zscores.append(z)
+        ci_los.append(ci_lo); ci_his.append(ci_hi)
+        delong_rows.append({
+            'Model': mn, 'N_Features': nf,
+            'Subset_AUC': sub_auc, 'Full_AUC': full_auc,
+            'AUC_Diff': sub_auc - full_auc,
+            'CI_95_Low': ci_lo, 'CI_95_High': ci_hi,
+            'Z_score': z, 'P_value': p,
+            'Significant': 'No' if p >= ALPHA else 'Yes',
+        })
 
-    diffs = np.array(diffs)
-    # [FIX-2] H0: diff = 0, two-sided
-    p_value = np.mean(np.abs(diffs) >= np.abs(observed_diff))
-    p_value = max(p_value, 1.0 / n_bootstrap)
-    ci_low  = np.percentile(diffs, 2.5)
-    ci_high = np.percentile(diffs, 97.5)
-    return p_value, auc_a, auc_b, ci_low, ci_high
+    # opt_n = first N where p >= ALPHA (subset not significantly worse than full)
+    opt_n = len(top_feats)          # fallback: use all
+    for i, p in enumerate(pvals):
+        if p >= ALPHA:
+            opt_n = i + 1; break
+
+    return {
+        'fcs': fcs, 'aucs': aucs, 'pvals': pvals,
+        'zscores': zscores, 'ci_los': ci_los, 'ci_his': ci_his,
+        'opt_n': opt_n, 'opt_feats': top_feats[:opt_n],
+        'opt_auc': aucs[opt_n-1], 'full_auc': full_auc,
+        'delong_rows': delong_rows,
+    }
+
+
+def plot_combined_ablation(ablation_results, rf):
+    """
+    [v4-6] One figure with all retained models' ablation curves.
+    Optimal point of each model is marked with a star.
+    """
+    plt.figure(figsize=(12, 8))
+    for i, (mn, res) in enumerate(ablation_results.items()):
+        c  = MODEL_COLORS[i % 8]
+        on = res['opt_n']
+        plt.plot(res['fcs'], res['aucs'], 'o-', color=c, lw=2, ms=4,
+                 label=f'{mn} (opt={on}, AUC={res["opt_auc"]:.3f})')
+        plt.scatter([on], [res['opt_auc']], s=200, marker='*',
+                    color=c, edgecolors='black', lw=1.5, zorder=6)
+        # horizontal full-AUC reference (same color, dashed)
+        plt.axhline(y=res['full_auc'], color=c, ls=':', lw=1, alpha=0.5)
+
+    plt.xlabel('Number of Features', fontsize=13)
+    plt.ylabel('Macro AUC (CV-OOF)', fontsize=13)
+    plt.title('Feature Ablation — All Retained Models (★ = DeLong-optimal)',
+              fontsize=14, fontweight='bold')
+    plt.legend(loc='lower right', fontsize=9)
+    plt.grid(True, alpha=0.15); plt.tight_layout()
+    for ext,dpi in [('pdf',300),('png',150)]:
+        plt.savefig(os.path.join(rf,f'ablation_combined.{ext}'),
+                    format=ext, bbox_inches='tight', dpi=dpi)
+    plt.close()
+
+
+def plot_single_ablation(mn, res, rf):
+    """Per-model ablation plot with p-value annotation."""
+    fcs   = res['fcs']; aucs = res['aucs']
+    pvals = res['pvals']; on = res['opt_n']
+    full  = res['full_auc']
+
+    fig, ax1 = plt.subplots(figsize=(12, 7))
+    ax2 = ax1.twinx()
+
+    ax1.plot(fcs, aucs, 'o-', color='#2563eb', lw=2, ms=5, label='Macro AUC')
+    ax1.scatter([on], [aucs[on-1]], s=250, marker='*',
+                color='#ef4444', edgecolors='black', lw=2, zorder=6,
+                label=f'Optimal N={on}')
+    ax1.axhline(y=full, color='gray', ls='--', lw=1.2, alpha=0.6,
+                label=f'Full-feature AUC={full:.3f}')
+    ax1.set_xlabel('Number of Features', fontsize=13)
+    ax1.set_ylabel('Macro AUC (CV-OOF)', fontsize=13, color='#2563eb')
+    ax1.tick_params(axis='y', labelcolor='#2563eb')
+    ax1.set_ylim([max(0, min(aucs) - 0.05), min(1.02, max(aucs) + 0.05)])
+
+    ax2.bar(fcs, [-np.log10(max(p, 1e-6)) for p in pvals],
+            color='#f59e0b', alpha=0.35, width=0.6, label='-log10(p)')
+    ax2.axhline(y=-np.log10(0.05), color='#ef4444', ls='-.', lw=1.2,
+                label='p=0.05 threshold')
+    ax2.set_ylabel('-log10(DeLong p-value)', fontsize=12, color='#b45309')
+    ax2.tick_params(axis='y', labelcolor='#b45309')
+
+    lines1, labels1 = ax1.get_legend_handles_labels()
+    lines2, labels2 = ax2.get_legend_handles_labels()
+    ax1.legend(lines1 + lines2, labels1 + labels2,
+               loc='lower right', fontsize=9)
+    plt.title(f'Feature Ablation + DeLong Test — {mn} (★ Optimal={on})',
+              fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    for ext,dpi in [('pdf',300),('png',150)]:
+        plt.savefig(os.path.join(rf,f'ablation_{mn}.{ext}'),
+                    format=ext,bbox_inches='tight',dpi=dpi)
+    plt.close()
 
 
 # ============================================================================
-# [FIX-1] Model configs — XGBoost num_class constructed conditionally
+# Model configs
 # ============================================================================
-ALL_MODEL_NAMES = ['RF', 'DT', 'KNN', 'XGB', 'AdaBoost', 'LR', 'NB', 'SVM']
+ALL_MODEL_NAMES = ['RF','DT','KNN','XGB','AdaBoost','LR','NB','SVM']
 
 def get_models_config(selected, n_classes, rs=42):
-    xgb_kwargs = dict(random_state=rs, eval_metric='mlogloss', n_jobs=-1)
+    xgb_kw = dict(random_state=rs, eval_metric='mlogloss', n_jobs=-1)
     if n_classes > 2:
-        xgb_kwargs['objective']  = 'multi:softprob'
-        xgb_kwargs['num_class']  = n_classes
+        xgb_kw.update(objective='multi:softprob', num_class=n_classes)
     else:
-        xgb_kwargs['objective']  = 'binary:logistic'
-        xgb_kwargs['eval_metric'] = 'logloss'
-
+        xgb_kw.update(objective='binary:logistic', eval_metric='logloss')
     cfg = {
-        'RF':       {'model': RandomForestClassifier(random_state=rs, n_jobs=-1),
-                     'params': {'n_estimators': [100,200], 'max_depth': [20,50],
-                                'min_samples_split': [2,5]}},
-        'DT':       {'model': DecisionTreeClassifier(random_state=rs),
-                     'params': {'max_depth': [20,50], 'min_samples_split': [2,10],
-                                'criterion': ['gini','entropy']}},
-        'KNN':      {'model': KNeighborsClassifier(n_jobs=-1),
-                     'params': {'n_neighbors': [3,5,7],
-                                'weights': ['uniform','distance']}},
-        'XGB':      {'model': XGBClassifier(**xgb_kwargs),
-                     'params': {'n_estimators': [100,200], 'max_depth': [5,7],
-                                'learning_rate': [0.05,0.1]}},
-        'AdaBoost': {'model': AdaBoostClassifier(random_state=rs),
-                     'params': {'n_estimators': [50,100],
-                                'learning_rate': [0.1,0.5,1.0]}},
-        'LR':       {'model': LogisticRegression(random_state=rs, n_jobs=-1,
-                                                  max_iter=2000),
-                     'params': {'C': [0.1,1,10], 'solver': ['lbfgs']}},
-        'NB':       {'model': GaussianNB(),
-                     'params': {'var_smoothing': [1e-9,1e-7,1e-5]}},
-        'SVM':      {'model': SVC(probability=True, random_state=rs,
-                                   decision_function_shape='ovr'),
-                     'params': {'C': [1,10], 'kernel': ['rbf','linear']}},
+        'RF':{'model':RandomForestClassifier(random_state=rs,n_jobs=-1),
+              'params':{'n_estimators':[100,200],'max_depth':[20,50],'min_samples_split':[2,5]}},
+        'DT':{'model':DecisionTreeClassifier(random_state=rs),
+              'params':{'max_depth':[20,50],'min_samples_split':[2,10],'criterion':['gini','entropy']}},
+        'KNN':{'model':KNeighborsClassifier(n_jobs=-1),
+               'params':{'n_neighbors':[3,5,7],'weights':['uniform','distance']}},
+        'XGB':{'model':XGBClassifier(**xgb_kw),
+               'params':{'n_estimators':[100,200],'max_depth':[5,7],'learning_rate':[0.05,0.1]}},
+        'AdaBoost':{'model':AdaBoostClassifier(random_state=rs),
+                    'params':{'n_estimators':[50,100],'learning_rate':[0.1,0.5,1.0]}},
+        'LR':{'model':LogisticRegression(random_state=rs,n_jobs=-1,max_iter=2000),
+              'params':{'C':[0.1,1,10],'solver':['lbfgs']}},
+        'NB':{'model':GaussianNB(),
+              'params':{'var_smoothing':[1e-9,1e-7,1e-5]}},
+        'SVM':{'model':SVC(probability=True,random_state=rs,decision_function_shape='ovr'),
+               'params':{'C':[1,10],'kernel':['rbf','linear']}},
     }
-    return {k: v for k, v in cfg.items() if k in selected}
+    return {k:v for k,v in cfg.items() if k in selected}
 
 
 # ============================================================================
@@ -524,833 +602,609 @@ def get_models_config(selected, n_classes, rs=42):
 # ============================================================================
 def run_pipeline(
     train_file, val_file1, val_file2, val_file3, n_classes_select,
-    selected_models, enable_tuning,
-    cv_folds, top_n_features, shap_sample_size,
+    selected_models, enable_tuning, cv_folds, top_n_features, shap_sample_size,
     progress=gr.Progress(track_tqdm=True),
 ):
     if train_file is None:
         return None, "❌ 请先上传训练集 CSV 文件"
-    sel = (selected_models if isinstance(selected_models, list)
+    sel = (selected_models if isinstance(selected_models,list)
            else [s.strip() for s in str(selected_models).split(",") if s.strip()])
     if not sel:
         return None, "❌ 请至少选择一个模型"
 
-    RS = 42; CVF = int(cv_folds)
-    TOPN = int(top_n_features); SHAPSZ = int(shap_sample_size)
-    TUNING = bool(enable_tuning)
+    RS=42; CVF=int(cv_folds); TOPN=int(top_n_features)
+    SHAPSZ=int(shap_sample_size); TUNING=bool(enable_tuning); ALPHA=0.05
 
-    L = []
-    def log(m): L.append(str(m))
-
-    rf = tempfile.mkdtemp(prefix="ml_")
+    L=[]; log=lambda m: L.append(str(m))
+    rf=tempfile.mkdtemp(prefix="ml_")
 
     try:
-        # ── Load Data ──
+        # ── Load ──────────────────────────────────────────────────────────
         progress(0.02, desc="📂 加载数据...")
-        log("━" * 60)
-        log("  🧬 ML 多分类模型训练与评估系统  v3")
-        log("━" * 60)
+        log("━"*60); log("  🧬 ML 多分类模型训练与评估系统  v4"); log("━"*60)
 
-        tp = (train_file if isinstance(train_file, str)
-              else getattr(train_file, 'name', str(train_file)))
+        tp = train_file if isinstance(train_file,str) else getattr(train_file,'name',str(train_file))
         data = pd.read_csv(tp)
-
-        y = data.iloc[:, 0]
-        col2 = data.iloc[:, 1]
-        col2_is_id = ((col2.dtype == 'object') or
-                      (col2.nunique() / len(col2) > 0.5))
-        if col2_is_id:
-            X = data.iloc[:, 2:]
-            log(f"  📋 CSV: Col1=Label, Col2=ID({data.columns[1]}), Col3+=Features")
-        else:
-            X = data.iloc[:, 1:]
-            log(f"  📋 CSV: Col1=Label, Col2+=Features (no ID column)")
+        y    = data.iloc[:,0]
+        col2 = data.iloc[:,1]
+        col2_is_id = (col2.dtype=='object') or (col2.nunique()/len(col2)>0.5)
+        X      = data.iloc[:,2:] if col2_is_id else data.iloc[:,1:]
         fnames = X.columns.tolist()
+        log(f"  📋 CSV: Col1=Label, {'Col2=ID, Col3+=Features' if col2_is_id else 'Col2+=Features'}")
 
-        user_n = int(str(n_classes_select).split(" ")[0])
+        user_n          = int(str(n_classes_select).split()[0])
         detected_classes = sorted(y.unique())
-        detected_classes = [int(c) if hasattr(c, 'item') else c
-                            for c in detected_classes]
-        detected_n = len(detected_classes)
-
-        if detected_n != user_n:
-            return None, (
-                f"❌ 您选择了 {user_n} 分类，但数据中检测到 {detected_n} 个类别: "
-                f"{detected_classes}\n请将分类数修改为 {detected_n}，或检查数据标签列")
-
-        classes = detected_classes
-        n_classes = user_n
-        log(f"  ✅ {n_classes} 分类 — 数据验证通过")
-
-        label_map     = {c: i for i, c in enumerate(classes)}
-        label_map_inv = {i: c for c, i in label_map.items()}
-        y_mapped      = y.map(label_map)
-        class_indices = list(range(n_classes))
-
-        log(f"  📊 训练集: {X.shape[0]} 样本 × {X.shape[1]} 特征")
-        log(f"  🏷️ 类别数: {n_classes} 类 — {classes}")
-        log(f"  📊 分布: {dict(y.value_counts().sort_index())}")
-        log(f"  🤖 模型: {', '.join(sel)}")
-        log(f"  🔧 调优: {'开启' if TUNING else '关闭'}  |  CV: {CVF}折")
-
-        if n_classes < 2 or n_classes > 8:
-            return None, f"❌ 仅支持 2~8 分类，当前检测到 {n_classes} 类"
-
-        task_type    = "Binary" if n_classes == 2 else f"{n_classes}-Class"
-        task_type_cn = "二分类" if n_classes == 2 else f"{n_classes}分类"
+        detected_classes = [int(c) if hasattr(c,'item') else c for c in detected_classes]
+        if len(detected_classes) != user_n:
+            return None,(f"❌ 选择 {user_n} 分类但检测到 {len(detected_classes)} 类: "
+                         f"{detected_classes}")
+
+        classes      = detected_classes; n_classes=user_n
+        label_map    = {c:i for i,c in enumerate(classes)}
+        y_mapped     = y.map(label_map)
+        class_indices= list(range(n_classes))
+        task_type    = "Binary" if n_classes==2 else f"{n_classes}-Class"
+        task_type_cn = "二分类" if n_classes==2 else f"{n_classes}分类"
+
+        log(f"  ✅ {n_classes} 分类  |  {X.shape[0]} 样本 × {X.shape[1]} 特征")
+        log(f"  🏷️ 类别: {classes}  分布: {dict(y.value_counts().sort_index())}")
+        log(f"  🤖 模型: {', '.join(sel)}  调优: {'开启' if TUNING else '关闭'}  CV: {CVF}折")
         log(f"  📋 任务: {task_type_cn} ({task_type})")
 
+        if n_classes<2 or n_classes>8:
+            return None,f"❌ 仅支持 2~8 分类，检测到 {n_classes} 类"
+
         mcfg = get_models_config(sel, n_classes, RS)
         skf  = StratifiedKFold(n_splits=CVF, shuffle=True, random_state=RS)
 
-        COLORS = ['#2563eb','#f59e0b','#10b981','#ef4444',
-                  '#8b5cf6','#ec4899','#06b6d4','#6b7280']
-
-        bpd = {}          # best params
-        amr = {}          # all model results (CV-OOF)
-        tms = {}          # trained models (full data)
-        train_results = {}  # metrics on full training set
-
-        total = len(mcfg)
+        bpd={}; amr={}; tms={}; train_results={}
+        total=len(mcfg)
 
         # ── Train All Models ──────────────────────────────────────────────
-        for mi, (mn, cf) in enumerate(mcfg.items()):
-            pv = 0.05 + 0.35 * mi / total
-            progress(pv, desc=f"🏋️ [{mi+1}/{total}] 训练 {mn}...")
-            log(f"\n{'─'*50}")
-            log(f"  🔄 [{mi+1}/{total}] {mn}")
-
-            Xv = X.values
+        for mi,(mn,cf) in enumerate(mcfg.items()):
+            progress(0.05+0.32*mi/total, desc=f"🏋️ [{mi+1}/{total}] 训练 {mn}...")
+            log(f"\n{'─'*50}"); log(f"  🔄 [{mi+1}/{total}] {mn}")
+            Xv=X.values
 
-            # Optional GridSearch
             if TUNING:
                 log(f"     ⏳ GridSearchCV (CV={CVF})...")
-                scoring = 'roc_auc_ovr' if n_classes > 2 else 'roc_auc'
-                gs = GridSearchCV(cf['model'], cf['params'], cv=skf,
-                                  scoring=scoring, n_jobs=-1, verbose=0)
-                gs.fit(Xv, y_mapped)
-                bp = gs.best_params_; bpd[mn] = bp
+                scoring='roc_auc_ovr' if n_classes>2 else 'roc_auc'
+                gs=GridSearchCV(cf['model'],cf['params'],cv=skf,
+                                scoring=scoring,n_jobs=-1,verbose=0)
+                gs.fit(Xv,y_mapped); bp=gs.best_params_; bpd[mn]=bp
                 log(f"     ✓ 最佳CV Score: {gs.best_score_:.4f}")
             else:
-                bp = {}; bpd[mn] = "Default"
+                bp={}; bpd[mn]="Default"
 
-            # Fit final model on full training set
-            mdl = deepcopy(cf['model'])
+            mdl=deepcopy(cf['model'])
             if bp: mdl.set_params(**bp)
-            mdl.fit(Xv, y_mapped)
-            tms[mn] = mdl
+            mdl.fit(Xv,y_mapped); tms[mn]=mdl
 
             # Training-set metrics
-            train_proba_full = mdl.predict_proba(Xv)
-            train_pred_full  = mdl.predict(Xv)
-            train_met = compute_multiclass_metrics(
-                y_mapped.values, train_pred_full,
-                train_proba_full, class_indices)
-            train_results[mn] = {
-                'proba': train_proba_full,
-                'pred':  train_pred_full,
-                'metrics': train_met,
-            }
-
-            # ── CV evaluation (OOF = Internal Validation) ──
-            all_yt = []; all_yp = []; all_yproba = []
-            fold_metrics = []
-
-            for fi, (tri, tei) in enumerate(skf.split(X, y_mapped), 1):
-                Xtr, Xte = X.iloc[tri].values, X.iloc[tei].values
-                ytr, yte = y_mapped.iloc[tri], y_mapped.iloc[tei]
-                mf = deepcopy(cf['model'])
+            tp_full=mdl.predict_proba(Xv); td_full=mdl.predict(Xv)
+            tm=compute_multiclass_metrics(y_mapped.values,td_full,tp_full,class_indices)
+            train_results[mn]={'proba':tp_full,'pred':td_full,'metrics':tm}
+
+            # CV-OOF
+            all_yt=[]; all_yp=[]; all_yproba=[]; fold_metrics=[]
+            for fi,(tri,tei) in enumerate(skf.split(X,y_mapped),1):
+                mf=deepcopy(cf['model'])
                 if bp: mf.set_params(**bp)
-                mf.fit(Xtr, ytr)
-                ypred  = mf.predict(Xte)
-                yproba = mf.predict_proba(Xte)
-                all_yt.extend(yte)
-                all_yp.extend(ypred)
+                mf.fit(X.iloc[tri].values,y_mapped.iloc[tri])
+                ypred=mf.predict(X.iloc[tei].values)
+                yproba=mf.predict_proba(X.iloc[tei].values)
+                all_yt.extend(y_mapped.iloc[tei]); all_yp.extend(ypred)
                 all_yproba.append(yproba)
-
-                fm = compute_multiclass_metrics(yte, ypred, yproba, class_indices)
-                # [v3-2] Extended fold row
-                fold_metrics.append({
-                    'Fold':        fi,
-                    'AUC':         fm['Macro_AUC'],
-                    'Accuracy':    fm['Accuracy'],
-                    'Sensitivity': fm['Macro_Sensitivity'],
-                    'Specificity': fm['Macro_Specificity'],
-                    'PPV':         fm['Macro_PPV'],
-                    'NPV':         fm['Macro_NPV'],
-                    'F1':          fm['Macro_F1'],
-                    'Weighted_F1': fm['Weighted_F1'],
-                    'Kappa':       fm['Kappa'],
-                })
-
-            all_yt     = np.array(all_yt)
-            all_yp     = np.array(all_yp)
-            all_yproba = np.vstack(all_yproba)
-
-            # Build fold table with Mean row
-            fdf = pd.DataFrame(fold_metrics)
-            mean_row = {
-                col: (fdf[col].mean() if col != 'Fold' else 'Mean')
-                for col in fdf.columns
+                fm=compute_multiclass_metrics(y_mapped.iloc[tei],ypred,yproba,class_indices)
+                fold_metrics.append({'Fold':fi,'AUC':fm['Macro_AUC'],
+                    'Accuracy':fm['Accuracy'],'Sensitivity':fm['Macro_Sensitivity'],
+                    'Specificity':fm['Macro_Specificity'],'PPV':fm['Macro_PPV'],
+                    'NPV':fm['Macro_NPV'],'F1':fm['Macro_F1'],
+                    'Weighted_F1':fm['Weighted_F1'],'Kappa':fm['Kappa']})
+
+            all_yt=np.array(all_yt); all_yp=np.array(all_yp)
+            all_yproba=np.vstack(all_yproba)
+            fdf=pd.DataFrame(fold_metrics)
+            mr={c:(fdf[c].mean() if c!='Fold' else 'Mean') for c in fdf.columns}
+            fdf=pd.concat([fdf,pd.DataFrame([mr])],ignore_index=True)
+            oof_met=compute_multiclass_metrics(all_yt,all_yp,all_yproba,class_indices)
+
+            amr[mn]={
+                'fold_df':fdf,
+                'mean_auc':mr['AUC'],'mean_acc':mr['Accuracy'],
+                'mean_sens':mr['Sensitivity'],'mean_spec':mr['Specificity'],
+                'mean_ppv':mr['PPV'],'mean_npv':mr['NPV'],
+                'mean_f1':mr['F1'],'mean_wf1':mr['Weighted_F1'],
+                'mean_kappa':mr['Kappa'],
+                'oof_metrics':oof_met,
+                'all_yt':all_yt,'all_yp':all_yp,'all_yproba':all_yproba,
             }
-            fdf = pd.concat([fdf, pd.DataFrame([mean_row])], ignore_index=True)
-
-            # OOF aggregate metrics (computed on concatenated OOF predictions)
-            oof_met = compute_multiclass_metrics(
-                all_yt, all_yp, all_yproba, class_indices)
-
-            amr[mn] = {
-                'fold_df':          fdf,
-                'mean_auc':         mean_row['AUC'],
-                'mean_acc':         mean_row['Accuracy'],
-                'mean_sens':        mean_row['Sensitivity'],
-                'mean_spec':        mean_row['Specificity'],
-                'mean_ppv':         mean_row['PPV'],
-                'mean_npv':         mean_row['NPV'],
-                'mean_f1':          mean_row['F1'],
-                'mean_wf1':         mean_row['Weighted_F1'],
-                'mean_kappa':       mean_row['Kappa'],
-                'oof_metrics':      oof_met,
-                'all_yt':           all_yt,
-                'all_yp':           all_yp,
-                'all_yproba':       all_yproba,
-            }
-
-            # [v3-7] Log all key metrics
-            tm = train_met; vm = mean_row
             log(f"     ✅ [Train]  AUC={tm['Macro_AUC']:.4f}  Acc={tm['Accuracy']:.4f}  "
                 f"Sens={tm['Macro_Sensitivity']:.4f}  Spec={tm['Macro_Specificity']:.4f}  "
                 f"PPV={tm['Macro_PPV']:.4f}  NPV={tm['Macro_NPV']:.4f}  "
                 f"F1={tm['Macro_F1']:.4f}  Kappa={tm['Kappa']:.4f}")
-            log(f"     ✅ [CV-OOF] AUC={vm['AUC']:.4f}  Acc={vm['Accuracy']:.4f}  "
-                f"Sens={vm['Sensitivity']:.4f}  Spec={vm['Specificity']:.4f}  "
-                f"PPV={vm['PPV']:.4f}  NPV={vm['NPV']:.4f}  "
-                f"F1={vm['F1']:.4f}  Kappa={vm['Kappa']:.4f}")
+            log(f"     ✅ [CV-OOF] AUC={mr['AUC']:.4f}  Acc={mr['Accuracy']:.4f}  "
+                f"Sens={mr['Sensitivity']:.4f}  Spec={mr['Specificity']:.4f}  "
+                f"PPV={mr['PPV']:.4f}  NPV={mr['NPV']:.4f}  "
+                f"F1={mr['F1']:.4f}  Kappa={mr['Kappa']:.4f}")
 
-        mnames = list(amr.keys()); nm = len(mnames)
-        log(f"\n{'━'*60}")
-        log(f"  ✅ {nm} 个模型训练完成")
+        mnames=list(amr.keys()); nm=len(mnames)
+        log(f"\n{'━'*60}"); log(f"  ✅ {nm} 个模型训练完成")
 
-        # ── Training-set ROC / PR / CM for every model ───────────────────
-        progress(0.40, desc="📈 训练集曲线...")
-        log(f"\n  📈 绘制训练集 ROC / PR / CM...")
+        # ── Training-set plots ────────────────────────────────────────────
+        progress(0.39, desc="📈 训练集曲线...")
+        log(f"\n  📈 绘制训练集 ROC/PR/CM...")
         for mn in mnames:
-            tr = train_results[mn]
-            tm = tr['metrics']
-            plot_multiclass_roc(
-                y_mapped.values, tr['proba'], class_indices,
+            tr=train_results[mn]; tm=tr['metrics']
+            plot_multiclass_roc(y_mapped.values,tr['proba'],class_indices,
                 f'ROC (Train) — {mn} ({task_type}, AUC={tm["Macro_AUC"]:.3f})',
-                f'roc_train_{mn}', rf)
-            plot_multiclass_pr(
-                y_mapped.values, tr['proba'], class_indices,
-                f'PR (Train) — {mn} ({task_type})',
-                f'pr_train_{mn}', rf)
-            plot_confusion_matrix(
-                y_mapped.values, tr['pred'], class_indices,
-                f'CM (Train) — {mn} (Acc={tm["Accuracy"]:.3f})',
-                f'cm_train_{mn}', rf)
-
-        # Combined training-set ROC (all models, macro)
-        plt.figure(figsize=(10, 8))
-        for i, mn in enumerate(mnames):
-            tr = train_results[mn]
-            y_bin = label_binarize(y_mapped.values, classes=class_indices)
-            if n_classes == 2: y_bin = np.hstack([1 - y_bin, y_bin])
-            all_fpr = np.linspace(0, 1, 200); mean_tpr = np.zeros_like(all_fpr)
-            for c in range(n_classes):
-                f, t, _ = roc_curve(y_bin[:, c], tr['proba'][:, c])
-                mean_tpr += np.interp(all_fpr, f, t)
-            mean_tpr /= n_classes; mean_tpr[-1] = 1.0
-            ma = auc_score(all_fpr, mean_tpr)
-            plt.plot(all_fpr, mean_tpr, color=COLORS[i%8], lw=2.5,
-                     label=f'{mn} (Macro AUC={ma:.3f})')
+                f'roc_train_{mn}',rf)
+            plot_multiclass_pr(y_mapped.values,tr['proba'],class_indices,
+                f'PR (Train) — {mn} ({task_type})',f'pr_train_{mn}',rf)
+            plot_confusion_matrix(y_mapped.values,tr['pred'],class_indices,
+                f'CM (Train) — {mn} (Acc={tm["Accuracy"]:.3f})',f'cm_train_{mn}',rf)
+
+        # Combined training ROC
+        plt.figure(figsize=(10,8))
+        for i,mn in enumerate(mnames):
+            tr=train_results[mn]
+            fpr,tpr,ma=_macro_roc_curve(y_mapped.values,tr['proba'],class_indices)
+            plt.plot(fpr,tpr,color=MODEL_COLORS[i%8],lw=2.5,label=f'{mn} (AUC={ma:.3f})')
         plt.plot([0,1],[0,1],'--',color='#ccc',lw=1)
         plt.xlim([-0.02,1.02]); plt.ylim([-0.02,1.02])
         plt.xlabel('FPR',fontsize=13); plt.ylabel('TPR',fontsize=13)
-        plt.title(f'ROC (Train) — All Models ({task_type})',
-                  fontsize=14, fontweight='bold')
-        plt.legend(loc='lower right',fontsize=10)
-        plt.grid(True,alpha=0.15); plt.tight_layout()
-        plt.savefig(os.path.join(rf,'roc_train_all.pdf'),
-                    format='pdf',bbox_inches='tight',dpi=300)
-        plt.savefig(os.path.join(rf,'roc_train_all.png'),
-                    format='png',bbox_inches='tight',dpi=150)
+        plt.title(f'ROC (Train) — All Models ({task_type})',fontsize=14,fontweight='bold')
+        plt.legend(loc='lower right',fontsize=10); plt.grid(True,alpha=0.15); plt.tight_layout()
+        for ext,dpi in [('pdf',300),('png',150)]:
+            plt.savefig(os.path.join(rf,f'roc_train_all.{ext}'),format=ext,bbox_inches='tight',dpi=dpi)
         plt.close()
 
-        # ── CV-OOF ROC / PR / CM ─────────────────────────────────────────
-        progress(0.44, desc="📈 内部验证ROC曲线...")
-        log(f"\n  📈 绘制内部验证(CV-OOF) ROC / PR / CM...")
+        # ── CV-OOF plots ─────────────────────────────────────────────────
+        progress(0.43, desc="📈 内部验证曲线...")
+        log(f"\n  📈 绘制CV-OOF ROC/PR/CM...")
         for mn in mnames:
-            r = amr[mn]
-            plot_multiclass_roc(
-                r['all_yt'], r['all_yproba'], class_indices,
+            r=amr[mn]
+            plot_multiclass_roc(r['all_yt'],r['all_yproba'],class_indices,
                 f'ROC (Internal Val) — {mn} ({task_type}, AUC={r["mean_auc"]:.3f})',
-                f'roc_val_{mn}', rf)
-
-        # Combined CV-OOF ROC
-        plt.figure(figsize=(10, 8))
-        for i, mn in enumerate(mnames):
-            r = amr[mn]
-            y_bin = label_binarize(r['all_yt'], classes=class_indices)
-            if n_classes == 2: y_bin = np.hstack([1 - y_bin, y_bin])
-            all_fpr = np.linspace(0, 1, 200); mean_tpr = np.zeros_like(all_fpr)
-            for c in range(n_classes):
-                f, t, _ = roc_curve(y_bin[:, c], r['all_yproba'][:, c])
-                mean_tpr += np.interp(all_fpr, f, t)
-            mean_tpr /= n_classes; mean_tpr[-1] = 1.0
-            ma = auc_score(all_fpr, mean_tpr)
-            plt.plot(all_fpr, mean_tpr, color=COLORS[i%8], lw=2.5,
-                     label=f'{mn} (Macro AUC={ma:.3f})')
+                f'roc_val_{mn}',rf)
+            plot_multiclass_pr(r['all_yt'],r['all_yproba'],class_indices,
+                f'PR (Internal Val) — {mn} ({task_type})',f'pr_val_{mn}',rf)
+            plot_confusion_matrix(r['all_yt'],r['all_yp'],class_indices,
+                f'CM (Internal Val) — {mn} (Acc={r["mean_acc"]:.3f})',f'cm_val_{mn}',rf)
+
+        # Combined val ROC
+        plt.figure(figsize=(10,8))
+        for i,mn in enumerate(mnames):
+            r=amr[mn]
+            fpr,tpr,ma=_macro_roc_curve(r['all_yt'],r['all_yproba'],class_indices)
+            plt.plot(fpr,tpr,color=MODEL_COLORS[i%8],lw=2.5,label=f'{mn} (AUC={ma:.3f})')
         plt.plot([0,1],[0,1],'--',color='#ccc',lw=1)
         plt.xlim([-0.02,1.02]); plt.ylim([-0.02,1.02])
         plt.xlabel('FPR',fontsize=13); plt.ylabel('TPR',fontsize=13)
-        plt.title(f'ROC (Internal Val / CV-OOF) — All Models ({task_type})',
-                  fontsize=14, fontweight='bold')
-        plt.legend(loc='lower right',fontsize=10)
-        plt.grid(True,alpha=0.15); plt.tight_layout()
-        plt.savefig(os.path.join(rf,'roc_val_all.pdf'),
-                    format='pdf',bbox_inches='tight',dpi=300)
-        plt.savefig(os.path.join(rf,'roc_val_all.png'),
-                    format='png',bbox_inches='tight',dpi=150)
+        plt.title(f'ROC (Internal Val) — All Models ({task_type})',fontsize=14,fontweight='bold')
+        plt.legend(loc='lower right',fontsize=10); plt.grid(True,alpha=0.15); plt.tight_layout()
+        for ext,dpi in [('pdf',300),('png',150)]:
+            plt.savefig(os.path.join(rf,f'roc_val_all.{ext}'),format=ext,bbox_inches='tight',dpi=dpi)
         plt.close()
 
-        progress(0.48, desc="📈 PR曲线...")
-        for mn in mnames:
-            r = amr[mn]
-            plot_multiclass_pr(
-                r['all_yt'], r['all_yproba'], class_indices,
-                f'PR (Internal Val) — {mn} ({task_type})',
-                f'pr_val_{mn}', rf)
+        # ── [v4-1/2] Step 1: DeLong test — all models vs best ────────────
+        progress(0.47, desc="🔬 Step1: DeLong模型筛选...")
+        ref_mn  = max(amr, key=lambda x: amr[x]['mean_auc'])
+        ref_auc = amr[ref_mn]['mean_auc']
+        log(f"\n{'━'*60}")
+        log(f"  🔬 Step 1 — DeLong检验 (参照模型: {ref_mn}, AUC={ref_auc:.4f})")
+        log(f"  α=0.05，p≥0.05 → 保留（与最佳无统计学差异）")
 
-        progress(0.51, desc="📊 混淆矩阵...")
-        for mn in mnames:
-            r = amr[mn]
-            plot_confusion_matrix(
-                r['all_yt'], r['all_yp'], class_indices,
-                f'CM (Internal Val) — {mn} (Acc={r["mean_acc"]:.3f})',
-                f'cm_val_{mn}', rf)
-
-        # ── Bootstrap AUC Test ────────────────────────────────────────────
-        progress(0.54, desc="🔬 Bootstrap AUC 检验...")
-        best_mn  = max(amr, key=lambda x: amr[x]['mean_auc'])
-        best_auc = amr[best_mn]['mean_auc']
-        log(f"\n  🏆 最佳模型: {best_mn} (Macro AUC={best_auc:.4f})")
-        log(f"  🔬 Bootstrap 检验 (n=2000, α=0.05)...")
-
-        ALPHA = 0.05
-        bootstrap_results = []
-        retained = [best_mn]
+        delong_step1_rows=[]; delong_retained=[ref_mn]
 
         for om in mnames:
-            if om == best_mn:
+            if om==ref_mn:
+                delong_step1_rows.append({
+                    'Model':om,'AUC':amr[om]['mean_auc'],
+                    'vs_Best':'-','AUC_Diff':0,
+                    'CI_95_Low':0,'CI_95_High':0,
+                    'Z_score':0,'P_value':1.0,
+                    'Decision':'Best (reference)',
+                })
                 continue
-            p_val, auc_a, auc_b, ci_lo, ci_hi = bootstrap_auc_test(
-                amr[best_mn]['all_yt'],
-                amr[best_mn]['all_yproba'],
+            p,a_ref,a_om,ci_lo,ci_hi,z = delong_test(
+                amr[ref_mn]['all_yt'],
+                amr[ref_mn]['all_yproba'],
                 amr[om]['all_yproba'],
                 class_indices, n_bootstrap=2000)
-            dec = "Retained" if p_val >= ALPHA else "Excluded"
-            if p_val >= ALPHA:
-                retained.append(om)
-            bootstrap_results.append({
-                'Model_A':    best_mn, 'AUC_A': auc_a,
-                'Model_B':    om,      'AUC_B': auc_b,
-                'AUC_Diff':   auc_a - auc_b,
-                'CI_95_Low':  ci_lo, 'CI_95_High': ci_hi,
-                'P_value':    p_val, 'Decision': dec,
+            dec = "Retained" if p>=ALPHA else "Excluded"
+            if p>=ALPHA: delong_retained.append(om)
+            delong_step1_rows.append({
+                'Model':om,'AUC':amr[om]['mean_auc'],
+                'vs_Best':ref_mn,'AUC_Diff':a_ref-a_om,
+                'CI_95_Low':ci_lo,'CI_95_High':ci_hi,
+                'Z_score':z,'P_value':p,'Decision':dec,
             })
-            log(f"     {best_mn} vs {om}: ΔAUC={auc_a-auc_b:+.4f}  "
+            log(f"     {ref_mn} vs {om}: ΔAUC={a_ref-a_om:+.4f}  "
                 f"95%CI=[{ci_lo:+.4f},{ci_hi:+.4f}]  "
-                f"P={p_val:.4f} → {dec}")
-
-        bootstrap_df = (pd.DataFrame(bootstrap_results)
-                        .sort_values('P_value', ascending=False)
-                        if bootstrap_results else pd.DataFrame())
-        log(f"  ✅ 保留 {len(retained)}/{nm} 个模型: {', '.join(retained)}")
-
-        # ── Best model: Train vs Internal Val overlay ─────────────────────
-        progress(0.57, desc="📈 Train vs Val 对比图...")
-        log(f"\n  📈 最佳模型 {best_mn}: Train vs Internal Validation 对比...")
-        auc_tr_b, auc_vl_b = plot_train_vs_val_roc(
-            y_mapped.values,  train_results[best_mn]['proba'],
-            amr[best_mn]['all_yt'], amr[best_mn]['all_yproba'],
-            class_indices, best_mn, f'roc_train_vs_val_{best_mn}', rf)
-        ap_tr_b, ap_vl_b = plot_train_vs_val_pr(
-            y_mapped.values,  train_results[best_mn]['proba'],
-            amr[best_mn]['all_yt'], amr[best_mn]['all_yproba'],
-            class_indices, best_mn, f'pr_train_vs_val_{best_mn}', rf)
-        log(f"     ROC — Train AUC={auc_tr_b:.4f} / Val AUC={auc_vl_b:.4f}")
-        log(f"     PR  — Train AP={ap_tr_b:.4f}   / Val AP={ap_vl_b:.4f}")
-
-        # ── SHAP ──────────────────────────────────────────────────────────
-        progress(0.60, desc="🔥 SHAP分析...")
-        log(f"\n  🔥 SHAP特征分析 (保留模型中 Top 3)...")
-        shap_imp = {}
-        models_for_shap = sorted(retained,
-                                  key=lambda x: amr[x]['mean_auc'],
-                                  reverse=True)[:3]
-
-        for si, mn in enumerate(models_for_shap):
-            progress(0.60 + 0.10 * si / max(len(models_for_shap), 1),
+                f"Z={z:+.3f}  P={p:.4f} → {dec}")
+
+        delong_step1_df = (pd.DataFrame(delong_step1_rows)
+                           .sort_values('AUC',ascending=False))
+        log(f"  ✅ Step1保留 {len(delong_retained)}/{nm} 个模型: "
+            f"{', '.join(delong_retained)}")
+
+        # Train vs Val overlay for best model
+        progress(0.50, desc="📈 Train vs Val 对比...")
+        log(f"\n  📈 参照模型 {ref_mn}: Train vs Internal Validation...")
+        auc_tr_r,auc_vl_r=plot_train_vs_val_roc(
+            y_mapped.values,train_results[ref_mn]['proba'],
+            amr[ref_mn]['all_yt'],amr[ref_mn]['all_yproba'],
+            class_indices,ref_mn,f'roc_train_vs_val_{ref_mn}',rf)
+        ap_tr_r,ap_vl_r=plot_train_vs_val_pr(
+            y_mapped.values,train_results[ref_mn]['proba'],
+            amr[ref_mn]['all_yt'],amr[ref_mn]['all_yproba'],
+            class_indices,ref_mn,f'pr_train_vs_val_{ref_mn}',rf)
+        log(f"     ROC Train={auc_tr_r:.4f} / Val={auc_vl_r:.4f}")
+        log(f"     PR  Train={ap_tr_r:.4f}  / Val={ap_vl_r:.4f}")
+
+        # ── [v4-3] SHAP for all delong_retained models ───────────────────
+        progress(0.53, desc="🔥 SHAP分析...")
+        log(f"\n  🔥 SHAP — 对所有保留模型计算特征重要性...")
+        shap_imp={}
+        for si,mn in enumerate(delong_retained):
+            progress(0.53+0.08*si/max(len(delong_retained),1),
                      desc=f"🔥 SHAP: {mn}...")
-            mo = tms[mn]; Xshap = X.values
-            ns = min(SHAPSZ, Xshap.shape[0])
-            np.random.seed(RS)
-            sidx = np.random.choice(Xshap.shape[0], ns, replace=False)
-            Xs = Xshap[sidx]
             try:
-                if mn in ['RF', 'XGB', 'DT', 'AdaBoost']:
-                    exp = shap.TreeExplainer(mo)
-                    sv  = exp.shap_values(Xs)
-                else:
-                    bg  = Xs[np.random.choice(ns, min(50, ns), replace=False)]
-                    exp = shap.KernelExplainer(
-                        lambda x, m=mo: m.predict_proba(x), bg)
-                    sv  = exp.shap_values(Xs)
-
-                # [FIX-3] Robust SHAP shape handling
-                if isinstance(sv, list):
-                    sv_abs = np.mean([np.abs(s) for s in sv], axis=0)
-                elif sv.ndim == 3:
-                    if sv.shape[2] == n_classes:
-                        sv_abs = np.mean(np.abs(sv), axis=2)
-                    elif sv.shape[1] == n_classes:
-                        sv_abs = np.mean(np.abs(sv), axis=1)
-                    else:
-                        sv_abs = np.abs(sv).mean(axis=-1)
-                else:
-                    sv_abs = np.abs(sv)
-
-                fi = sv_abs.mean(axis=0)
-                if len(fi) > len(fnames): fi = fi[:len(fnames)]
-                elif len(fi) < len(fnames):
-                    fi = np.pad(fi, (0, len(fnames) - len(fi)))
-
-                idf = (pd.DataFrame({'Feature': fnames, 'Importance': fi})
-                       .sort_values('Importance', ascending=False))
-                shap_imp[mn] = idf
-
-                plt.figure(figsize=(10, max(6, TOPN * 0.3)))
-                top_df = idf.head(TOPN).iloc[::-1]
-                plt.barh(top_df['Feature'], top_df['Importance'],
-                         color='#2563eb', alpha=0.8)
-                plt.xlabel('Mean |SHAP|', fontsize=12)
+                idf=run_shap_for_model(mn,tms[mn],X,fnames,n_classes,SHAPSZ,RS)
+                shap_imp[mn]=idf
+                plt.figure(figsize=(10,max(6,TOPN*0.3)))
+                top_df=idf.head(TOPN).iloc[::-1]
+                plt.barh(top_df['Feature'],top_df['Importance'],color='#2563eb',alpha=0.8)
+                plt.xlabel('Mean |SHAP|',fontsize=12)
                 plt.title(f'SHAP Feature Importance — {mn} (Top {TOPN})',
-                          fontsize=13, fontweight='bold')
+                          fontsize=13,fontweight='bold')
                 plt.tight_layout()
-                plt.savefig(os.path.join(rf, f'shap_{mn}.pdf'),
-                            format='pdf', bbox_inches='tight')
-                plt.savefig(os.path.join(rf, f'shap_{mn}.png'),
-                            format='png', bbox_inches='tight', dpi=150)
+                for ext,dpi in [('pdf',300),('png',150)]:
+                    plt.savefig(os.path.join(rf,f'shap_{mn}.{ext}'),
+                                format=ext,bbox_inches='tight',dpi=dpi)
                 plt.close()
-                log(f"     ✅ {mn} Top3: "
-                    f"{', '.join(idf.head(3)['Feature'].tolist())}")
+                log(f"     ✅ {mn} Top3: {', '.join(idf.head(3)['Feature'].tolist())}")
             except Exception as e:
                 log(f"     ⚠ {mn} SHAP失败: {e}")
 
-        # ── Feature Ablation ──────────────────────────────────────────────
-        progress(0.72, desc="🧪 特征消融...")
-        log(f"\n  🧪 特征消融 (仅最佳模型 {best_mn})...")
-        ablation_data = None
-        if best_mn in shap_imp:
-            imp_df    = shap_imp[best_mn]
-            top_feats = imp_df.head(TOPN)['Feature'].tolist()
-            fcs = []; aucs_a = []
-
-            for nf in range(1, len(top_feats) + 1):
-                Xsub = X[top_feats[:nf]]
-                fold_aucs = []
-                for tri, tei in skf.split(Xsub, y_mapped):
-                    mf = deepcopy(mcfg[best_mn]['model'])
-                    bp2 = bpd.get(best_mn, {})
-                    if isinstance(bp2, dict) and bp2:
-                        mf.set_params(**bp2)
-                    mf.fit(Xsub.iloc[tri].values, y_mapped.iloc[tri])
-                    yproba_f = mf.predict_proba(Xsub.iloc[tei].values)
-                    yte_f    = y_mapped.iloc[tei]
-                    try:
-                        a = (roc_auc_score(yte_f, yproba_f[:, 1])
-                             if n_classes == 2 else
-                             roc_auc_score(yte_f, yproba_f,
-                                           multi_class='ovr', average='macro'))
-                    except:
-                        a = 0.0
-                    fold_aucs.append(a)
-                fcs.append(nf); aucs_a.append(np.mean(fold_aucs))
-
-            full_auc = amr[best_mn]['mean_auc']
-            opt_n = len(top_feats)
-            for i, a in enumerate(aucs_a):
-                if a >= full_auc * 0.95:
-                    opt_n = i + 1; break
-
-            ablation_data = {
-                'fcs': fcs, 'aucs': aucs_a, 'feats': top_feats,
-                'opt_n': opt_n, 'opt_feats': top_feats[:opt_n]
-            }
-            log(f"     ✅ 最优特征数: {opt_n} "
-                f"(AUC={aucs_a[opt_n-1]:.4f} vs Full={full_auc:.4f})")
-
-            plt.figure(figsize=(10, 7))
-            plt.plot(fcs, aucs_a, 'o-', color='#2563eb', lw=2, ms=5)
-            plt.scatter([opt_n], [aucs_a[opt_n-1]], s=200, marker='*',
-                        color='#ef4444', edgecolors='black', lw=2, zorder=5)
-            plt.axhline(y=full_auc, color='gray', ls='--', lw=1, alpha=0.5,
-                        label=f'Full AUC={full_auc:.3f}')
-            plt.xlabel('Number of Features', fontsize=13)
-            plt.ylabel('Macro AUC', fontsize=13)
-            plt.title(f'Feature Ablation — {best_mn} (★ Optimal={opt_n})',
-                      fontsize=14, fontweight='bold')
-            plt.legend(fontsize=11); plt.grid(True, alpha=0.15); plt.tight_layout()
-            plt.savefig(os.path.join(rf, 'ablation.pdf'),
-                        format='pdf', bbox_inches='tight')
-            plt.savefig(os.path.join(rf, 'ablation.png'),
-                        format='png', bbox_inches='tight', dpi=150)
-            plt.close()
+        # ── [v4-4] Step 2: Feature ablation with DeLong for each retained model
+        progress(0.62, desc="🧪 Step2: 特征消融 + DeLong检验...")
+        log(f"\n{'━'*60}")
+        log(f"  🧪 Step 2 — 特征消融 + DeLong检验（每个保留模型）")
+        log(f"  判定准则：首个 DeLong p≥0.05 的特征数 = 最优特征数")
+
+        ablation_results={}   # mn → ablation dict
+        all_delong_step2_rows=[]
+
+        models_with_shap=[mn for mn in delong_retained if mn in shap_imp]
+        for ai,mn in enumerate(models_with_shap):
+            progress(0.62+0.15*ai/max(len(models_with_shap),1),
+                     desc=f"🧪 消融: {mn}...")
+            top_feats=shap_imp[mn].head(TOPN)['Feature'].tolist()
+            log(f"\n  🔬 {mn} 消融中 (Top{TOPN} 特征)...")
+
+            res=run_ablation_for_model(
+                mn, mcfg, bpd, X, y_mapped, class_indices, n_classes,
+                top_feats, skf,
+                amr[mn]['all_yt'], amr[mn]['all_yproba'],
+                ALPHA=ALPHA, N_BOOT=2000)
+
+            ablation_results[mn]=res
+            all_delong_step2_rows.extend(res['delong_rows'])
+
+            log(f"     Full-feature AUC={res['full_auc']:.4f}")
+            for idx,nf in enumerate(res['fcs']):
+                sig='*' if res['pvals'][idx]<ALPHA else ' '
+                log(f"     N={nf:3d}: AUC={res['aucs'][idx]:.4f}  "
+                    f"p={res['pvals'][idx]:.4f}{sig}  "
+                    f"CI=[{res['ci_los'][idx]:+.4f},{res['ci_his'][idx]:+.4f}]")
+            log(f"     ★ 最优特征数: {res['opt_n']}  "
+                f"(AUC={res['opt_auc']:.4f}, p={res['pvals'][res['opt_n']-1]:.4f})")
+
+            # Individual ablation plot with DeLong p overlay
+            plot_single_ablation(mn, res, rf)
+
+        # Combined ablation figure
+        if ablation_results:
+            plot_combined_ablation(ablation_results, rf)
+
+        # ── [v4-5] Final model selection ──────────────────────────────────
+        log(f"\n{'━'*60}")
+        log(f"  🏆 Step 3 — 最终模型选择")
+        log(f"  规则: ① 最优特征数最少; ② 若相同则全特征OOF AUC最高")
+
+        selection_rows=[]
+        for mn in models_with_shap:
+            res=ablation_results[mn]
+            selection_rows.append({
+                'Model':       mn,
+                'Full_AUC':    res['full_auc'],
+                'Opt_N':       res['opt_n'],
+                'Opt_AUC':     res['opt_auc'],
+                'Opt_Features': ', '.join(res['opt_feats']),
+            })
+            log(f"     {mn}: opt_n={res['opt_n']}  "
+                f"opt_AUC={res['opt_auc']:.4f}  full_AUC={res['full_auc']:.4f}")
 
-        # ── External Validation ───────────────────────────────────────────
-        val_files_list = [vf for vf in [val_file1, val_file2, val_file3]
-                          if vf is not None]
-        final_feats = ablation_data['opt_feats'] if ablation_data else fnames
+        selection_df = pd.DataFrame(selection_rows)
+
+        if selection_df.empty:
+            # Fallback: use reference model with all features
+            best_mn   = ref_mn
+            final_feats = fnames
+            log(f"  ⚠ 无消融结果，回退到参照模型 {ref_mn}（全特征）")
+        else:
+            # Primary: smallest opt_n; secondary: highest full_AUC
+            selection_df_sorted = selection_df.sort_values(
+                ['Opt_N','Full_AUC'], ascending=[True,False])
+            winner = selection_df_sorted.iloc[0]
+            best_mn     = winner['Model']
+            final_feats = ablation_results[best_mn]['opt_feats']
+            log(f"\n  🥇 最终模型: {best_mn}")
+            log(f"     最优特征数: {winner['Opt_N']}")
+            log(f"     最优AUC:    {winner['Opt_AUC']:.4f}")
+            log(f"     全特征AUC:  {winner['Full_AUC']:.4f}")
+            log(f"     特征列表: {', '.join(final_feats)}")
 
+        # ── External Validation ───────────────────────────────────────────
+        val_files_list=[vf for vf in [val_file1,val_file2,val_file3] if vf is not None]
         if val_files_list:
-            progress(0.82, desc="🧪 外部验证...")
-            log(f"\n{'━'*60}")
-            log(f"  🧪 外部验证 ({len(val_files_list)} 个验证集)")
-
-            for vi, vf in enumerate(val_files_list, 1):
-                vp = (vf if isinstance(vf, str)
-                      else getattr(vf, 'name', str(vf)))
-                ed = pd.read_csv(vp); ye_raw = ed.iloc[:, 0]
-                vcol2       = ed.iloc[:, 1]
-                vcol2_is_id = ((vcol2.dtype == 'object') or
-                               (vcol2.nunique() / len(vcol2) > 0.5))
-                Xe = ed.iloc[:, 2:] if vcol2_is_id else ed.iloc[:, 1:]
-
-                ye = ye_raw.map(label_map)
+            progress(0.80, desc="🧪 外部验证...")
+            log(f"\n{'━'*60}"); log(f"  🧪 外部验证 ({len(val_files_list)} 个验证集)")
+            for vi,vf in enumerate(val_files_list,1):
+                vp=vf if isinstance(vf,str) else getattr(vf,'name',str(vf))
+                ed=pd.read_csv(vp); ye_raw=ed.iloc[:,0]
+                vcol2=ed.iloc[:,1]
+                Xe = ed.iloc[:,2:] if ((vcol2.dtype=='object') or
+                     (vcol2.nunique()/len(vcol2)>0.5)) else ed.iloc[:,1:]
+                ye=ye_raw.map(label_map)
                 if ye.isna().any():
-                    log(f"     ⚠ 验证集 {vi} 含有训练集中不存在的标签，已跳过")
-                    continue
-
-                log(f"\n  📊 验证集 {vi}: {Xe.shape[0]} 样本, "
-                    f"{os.path.basename(vp)}")
-
-                Xes = Xe[final_feats]; Xtf = X[final_feats]
-                fm = deepcopy(mcfg[best_mn]['model'])
-                bp3 = bpd[best_mn]
-                if isinstance(bp3, dict) and bp3:
-                    fm.set_params(**bp3)
-                fm.fit(Xtf.values, y_mapped)
-                yep    = fm.predict_proba(Xes.values)
-                yed    = fm.predict(Xes.values)
-                ye_np  = ye.values
-
-                ext_met = compute_multiclass_metrics(
-                    ye_np, yed, yep, class_indices)
-                em = ext_met
-                log(f"     ✅ AUC={em['Macro_AUC']:.4f}  "
-                    f"Acc={em['Accuracy']:.4f}  "
-                    f"Sens={em['Macro_Sensitivity']:.4f}  "
-                    f"Spec={em['Macro_Specificity']:.4f}  "
-                    f"PPV={em['Macro_PPV']:.4f}  "
-                    f"NPV={em['Macro_NPV']:.4f}  "
-                    f"F1={em['Macro_F1']:.4f}  "
-                    f"Kappa={em['Kappa']:.4f}")
-
-                sfx = f'_ext{vi}' if len(val_files_list) > 1 else '_ext'
-                tag = f'Validation {vi}' if len(val_files_list) > 1 else 'External'
-
-                plot_multiclass_roc(ye_np, yep, class_indices,
-                    f'ROC — {tag} ({best_mn})', f'roc{sfx}', rf)
-                plot_multiclass_pr(ye_np, yep, class_indices,
-                    f'PR — {tag} ({best_mn})', f'pr{sfx}', rf)
-                plot_confusion_matrix(ye_np, yed, class_indices,
-                    f'CM — {tag} ({best_mn})', f'cm{sfx}', rf)
-
-                # [v3-5] Extended external validation Excel
-                with pd.ExcelWriter(
-                    os.path.join(rf, f'validation{sfx}.xlsx'),
-                    engine='openpyxl'
-                ) as w:
-                    # Macro metrics row
-                    macro_row = {'Model': best_mn,
-                                 'N_Features': len(final_feats)}
-                    macro_row.update(metrics_to_flat_row(em))
-                    pd.DataFrame([macro_row]).to_excel(
-                        w, sheet_name='Metrics_Macro', index=False)
-                    # Per-class detail
-                    per_class_df(em, class_indices).to_excel(
-                        w, sheet_name='Metrics_PerClass', index=False)
-                    pd.DataFrame({'Feature': final_feats}).to_excel(
-                        w, sheet_name='Features', index=False)
+                    log(f"     ⚠ 验证集{vi}含未知标签，已跳过"); continue
+                log(f"\n  📊 验证集{vi}: {Xe.shape[0]} 样本, {os.path.basename(vp)}")
+                Xes=Xe[final_feats]; Xtf=X[final_feats]
+                fm=deepcopy(mcfg[best_mn]['model'])
+                bp3=bpd[best_mn]
+                if isinstance(bp3,dict) and bp3: fm.set_params(**bp3)
+                fm.fit(Xtf.values,y_mapped)
+                yep=fm.predict_proba(Xes.values); yed=fm.predict(Xes.values)
+                em=compute_multiclass_metrics(ye.values,yed,yep,class_indices)
+                log(f"     ✅ AUC={em['Macro_AUC']:.4f}  Acc={em['Accuracy']:.4f}  "
+                    f"Sens={em['Macro_Sensitivity']:.4f}  Spec={em['Macro_Specificity']:.4f}  "
+                    f"PPV={em['Macro_PPV']:.4f}  NPV={em['Macro_NPV']:.4f}  "
+                    f"F1={em['Macro_F1']:.4f}  Kappa={em['Kappa']:.4f}")
+                sfx=f'_ext{vi}' if len(val_files_list)>1 else '_ext'
+                tag=f'Validation {vi}' if len(val_files_list)>1 else 'External'
+                plot_multiclass_roc(ye.values,yep,class_indices,
+                    f'ROC — {tag} ({best_mn})',f'roc{sfx}',rf)
+                plot_multiclass_pr(ye.values,yep,class_indices,
+                    f'PR — {tag} ({best_mn})',f'pr{sfx}',rf)
+                plot_confusion_matrix(ye.values,yed,class_indices,
+                    f'CM — {tag} ({best_mn})',f'cm{sfx}',rf)
+                with pd.ExcelWriter(os.path.join(rf,f'validation{sfx}.xlsx'),
+                                    engine='openpyxl') as w:
+                    mr2={'Model':best_mn,'N_Features':len(final_feats)}
+                    mr2.update(metrics_to_flat_row(em))
+                    pd.DataFrame([mr2]).to_excel(w,sheet_name='Metrics_Macro',index=False)
+                    per_class_df(em,class_indices).to_excel(w,sheet_name='Metrics_PerClass',index=False)
+                    pd.DataFrame({'Feature':final_feats}).to_excel(w,sheet_name='Features',index=False)
 
         # ── Save Results ──────────────────────────────────────────────────
-        progress(0.92, desc="💾 保存结果...")
+        progress(0.90, desc="💾 保存结果...")
         log(f"\n  💾 保存结果...")
 
-        with pd.ExcelWriter(
-            os.path.join(rf, 'model_evaluation.xlsx'),
-            engine='openpyxl'
-        ) as w:
-
-            # 1. Per-fold CV results for every model  [v3-2 extended columns]
-            for mn, r in amr.items():
-                r['fold_df'].to_excel(w, sheet_name=mn, index=False)
-
-            # 2. Summary — Internal Validation (CV-OOF)  [v3-3 all metrics]
-            sd = []
-            for mn, r in amr.items():
-                row = {
-                    'Model':    mn,
-                    'Retained': 'Yes' if mn in retained else 'No',
-                    'Best':     'Best' if mn == best_mn else '',
-                }
-                row.update({
-                    'AUC':         r['mean_auc'],
-                    'Accuracy':    r['mean_acc'],
-                    'Sensitivity': r['mean_sens'],
-                    'Specificity': r['mean_spec'],
-                    'PPV':         r['mean_ppv'],
-                    'NPV':         r['mean_npv'],
-                    'F1':          r['mean_f1'],
-                    'Weighted_F1': r['mean_wf1'],
-                    'Kappa':       r['mean_kappa'],
-                })
+        with pd.ExcelWriter(os.path.join(rf,'model_evaluation.xlsx'),engine='openpyxl') as w:
+
+            # Per-fold tables
+            for mn,r in amr.items():
+                r['fold_df'].to_excel(w,sheet_name=mn[:31],index=False)
+
+            # Summary CV-OOF
+            sd=[]
+            for mn,r in amr.items():
+                row={'Model':mn,
+                     'DeLong_Retained':'Yes' if mn in delong_retained else 'No',
+                     'Final_Model':'Yes' if mn==best_mn else ''}
+                row.update({'AUC':r['mean_auc'],'Accuracy':r['mean_acc'],
+                    'Sensitivity':r['mean_sens'],'Specificity':r['mean_spec'],
+                    'PPV':r['mean_ppv'],'NPV':r['mean_npv'],
+                    'F1':r['mean_f1'],'Weighted_F1':r['mean_wf1'],'Kappa':r['mean_kappa']})
                 sd.append(row)
-            (pd.DataFrame(sd)
-             .sort_values('AUC', ascending=False)
-             .to_excel(w, sheet_name='Summary_InternalVal', index=False))
+            (pd.DataFrame(sd).sort_values('AUC',ascending=False)
+             .to_excel(w,sheet_name='Summary_InternalVal',index=False))
 
-            # 3. Train vs Internal Validation  [v3-3 all metrics]
-            comparison_rows = []
+            # Train vs Val comparison
+            comp=[]
             for mn in amr:
-                tr_m = train_results[mn]['metrics']
-                vm   = amr[mn]
-                row  = {
-                    'Model':  mn,
-                    'Train_AUC':         tr_m['Macro_AUC'],
-                    'Train_Accuracy':    tr_m['Accuracy'],
-                    'Train_Sensitivity': tr_m['Macro_Sensitivity'],
-                    'Train_Specificity': tr_m['Macro_Specificity'],
-                    'Train_PPV':         tr_m['Macro_PPV'],
-                    'Train_NPV':         tr_m['Macro_NPV'],
-                    'Train_F1':          tr_m['Macro_F1'],
-                    'Train_Kappa':       tr_m['Kappa'],
-                    'Val_AUC':           vm['mean_auc'],
-                    'Val_Accuracy':      vm['mean_acc'],
-                    'Val_Sensitivity':   vm['mean_sens'],
-                    'Val_Specificity':   vm['mean_spec'],
-                    'Val_PPV':           vm['mean_ppv'],
-                    'Val_NPV':           vm['mean_npv'],
-                    'Val_F1':            vm['mean_f1'],
-                    'Val_Kappa':         vm['mean_kappa'],
-                    'AUC_Gap':           tr_m['Macro_AUC'] - vm['mean_auc'],
-                    'Retained':          'Yes' if mn in retained else 'No',
-                    'Best':              'Best' if mn == best_mn else '',
-                }
-                comparison_rows.append(row)
-            (pd.DataFrame(comparison_rows)
-             .sort_values('Val_AUC', ascending=False)
-             .to_excel(w, sheet_name='Train_vs_InternalVal', index=False))
-
-            # 4. Bootstrap test
-            if len(bootstrap_df) > 0:
-                bootstrap_df.to_excel(w, sheet_name='Bootstrap_Test',
-                                      index=False)
-
-            # 5. [v3-4] Per-class detail for EVERY model (train + val)
+                tr_m=train_results[mn]['metrics']; vm=amr[mn]
+                comp.append({'Model':mn,
+                    'Train_AUC':tr_m['Macro_AUC'],'Train_Acc':tr_m['Accuracy'],
+                    'Train_Sens':tr_m['Macro_Sensitivity'],'Train_Spec':tr_m['Macro_Specificity'],
+                    'Train_PPV':tr_m['Macro_PPV'],'Train_NPV':tr_m['Macro_NPV'],
+                    'Train_F1':tr_m['Macro_F1'],'Train_Kappa':tr_m['Kappa'],
+                    'Val_AUC':vm['mean_auc'],'Val_Acc':vm['mean_acc'],
+                    'Val_Sens':vm['mean_sens'],'Val_Spec':vm['mean_spec'],
+                    'Val_PPV':vm['mean_ppv'],'Val_NPV':vm['mean_npv'],
+                    'Val_F1':vm['mean_f1'],'Val_Kappa':vm['mean_kappa'],
+                    'AUC_Gap':tr_m['Macro_AUC']-vm['mean_auc'],
+                    'DeLong_Retained':'Yes' if mn in delong_retained else 'No',
+                    'Final_Model':'Yes' if mn==best_mn else ''})
+            (pd.DataFrame(comp).sort_values('Val_AUC',ascending=False)
+             .to_excel(w,sheet_name='Train_vs_InternalVal',index=False))
+
+            # [v4-1] DeLong Step1
+            delong_step1_df.to_excel(w,sheet_name='DeLong_Step1_ModelSel',index=False)
+
+            # [v4-4] DeLong Step2 — full ablation table
+            if all_delong_step2_rows:
+                (pd.DataFrame(all_delong_step2_rows)
+                 .to_excel(w,sheet_name='DeLong_Step2_Ablation',index=False))
+
+            # [v4-5] Model selection summary
+            if not selection_df.empty:
+                selection_df.sort_values(['Opt_N','Full_AUC'],
+                    ascending=[True,False]).to_excel(
+                    w,sheet_name='ModelSelection_Summary',index=False)
+
+            # Per-class detail for every model
             for mn in mnames:
-                # Val (OOF)
-                oof_pc = per_class_df(amr[mn]['oof_metrics'], class_indices)
-                sheet_v = f'{mn}_Val_PerClass'
-                if len(sheet_v) > 31: sheet_v = sheet_v[:31]
-                oof_pc.to_excel(w, sheet_name=sheet_v, index=False)
-
-                # Train
-                tr_pc = per_class_df(train_results[mn]['metrics'], class_indices)
-                sheet_t = f'{mn}_Train_PerClass'
-                if len(sheet_t) > 31: sheet_t = sheet_t[:31]
-                tr_pc.to_excel(w, sheet_name=sheet_t, index=False)
-
-        # Ablation Excel
-        if ablation_data:
-            with pd.ExcelWriter(
-                os.path.join(rf, 'feature_ablation.xlsx'),
-                engine='openpyxl'
-            ) as w:
-                pd.DataFrame({
-                    'N': ablation_data['fcs'],
-                    'AUC': ablation_data['aucs']
-                }).to_excel(w, sheet_name='Ablation', index=False)
-                for mn, idf in shap_imp.items():
-                    idf.to_excel(w, sheet_name=f'{mn}_Imp', index=False)
-
-        # ── best_params.txt  [v3-6] all metrics ──────────────────────────
-        with open(os.path.join(rf, 'best_params.txt'), 'w',
+                s=f'{mn}_Val_PC'; s=s[:31]
+                per_class_df(amr[mn]['oof_metrics'],class_indices).to_excel(
+                    w,sheet_name=s,index=False)
+                s=f'{mn}_Train_PC'; s=s[:31]
+                per_class_df(train_results[mn]['metrics'],class_indices).to_excel(
+                    w,sheet_name=s,index=False)
+
+        # SHAP importance Excel
+        if shap_imp:
+            with pd.ExcelWriter(os.path.join(rf,'shap_importance.xlsx'),
+                                engine='openpyxl') as w:
+                for mn,idf in shap_imp.items():
+                    idf.to_excel(w,sheet_name=mn[:31],index=False)
+
+        # Ablation summary Excel
+        if ablation_results:
+            with pd.ExcelWriter(os.path.join(rf,'feature_ablation.xlsx'),
+                                engine='openpyxl') as w:
+                for mn,res in ablation_results.items():
+                    df_abl=pd.DataFrame({
+                        'N_Features':res['fcs'],'AUC':res['aucs'],
+                        'P_value':res['pvals'],'Z_score':res['zscores'],
+                        'CI_Low':res['ci_los'],'CI_High':res['ci_his'],
+                        'Significant':[('Yes' if p<ALPHA else 'No') for p in res['pvals']],
+                    })
+                    df_abl.to_excel(w,sheet_name=mn[:31],index=False)
+                pd.DataFrame([{
+                    'Model':mn,'Opt_N':ablation_results[mn]['opt_n'],
+                    'Opt_AUC':ablation_results[mn]['opt_auc'],
+                    'Full_AUC':ablation_results[mn]['full_auc'],
+                    'Opt_Features':' | '.join(ablation_results[mn]['opt_feats']),
+                    'Final_Model':'Yes' if mn==best_mn else '',
+                } for mn in ablation_results]).sort_values(
+                    ['Opt_N','Full_AUC'],ascending=[True,False]).to_excel(
+                    w,sheet_name='Summary',index=False)
+
+        # Text report
+        with open(os.path.join(rf,'model_selection_report.txt'),'w',
                   encoding='utf-8') as f:
-            f.write(f"Task: {task_type} Classification ({n_classes} classes)\n")
-            f.write(f"Classes: {classes}\n")
-            f.write(f"Label Mapping: {label_map}\n\n")
-            f.write(f"Statistical Test: Bootstrap AUC Test "
-                    f"(n=2000, alpha=0.05)\n")
-            f.write(f"Retained Models: {', '.join(retained)} "
-                    f"({len(retained)}/{nm})\n\n")
-            f.write("=" * 65 + "\n")
-            f.write("Model Performance Summary\n")
-            f.write(f"{'Metric':<14} "
-                    f"{'AUC':>7} {'Acc':>7} {'Sens':>7} {'Spec':>7} "
-                    f"{'PPV':>7} {'NPV':>7} {'F1':>7} {'Kappa':>7}\n")
-            f.write("-" * 65 + "\n")
-
-            def fmt_row(label, m_auc, m_acc, m_sens, m_spec,
-                        m_ppv, m_npv, m_f1, m_kappa):
-                return (f"{label:<14} "
-                        f"{m_auc:>7.4f} {m_acc:>7.4f} {m_sens:>7.4f} "
-                        f"{m_spec:>7.4f} {m_ppv:>7.4f} {m_npv:>7.4f} "
-                        f"{m_f1:>7.4f} {m_kappa:>7.4f}\n")
-
-            for mn in mcfg:
-                status = ("* Best" if mn == best_mn
-                          else ("Retained" if mn in retained else "Excluded"))
-                tr_m = train_results[mn]['metrics']
-                vm   = amr[mn]
-                f.write(f"\nModel: {mn}  |  {status}\n")
-                f.write(fmt_row(
-                    "  Train",
-                    tr_m['Macro_AUC'],    tr_m['Accuracy'],
-                    tr_m['Macro_Sensitivity'], tr_m['Macro_Specificity'],
-                    tr_m['Macro_PPV'],    tr_m['Macro_NPV'],
-                    tr_m['Macro_F1'],     tr_m['Kappa']))
-                f.write(fmt_row(
-                    "  CV-OOF",
-                    vm['mean_auc'],  vm['mean_acc'],
-                    vm['mean_sens'], vm['mean_spec'],
-                    vm['mean_ppv'],  vm['mean_npv'],
-                    vm['mean_f1'],   vm['mean_kappa']))
-                f.write(f"  AUC Gap: "
-                        f"{tr_m['Macro_AUC'] - vm['mean_auc']:+.4f}\n")
-                bp = bpd[mn]
-                if isinstance(bp, dict):
-                    for k, v in bp.items():
-                        f.write(f"  {k}: {v}\n")
-                else:
-                    f.write(f"  Params: {bp}\n")
-
-            if len(bootstrap_df) > 0:
-                f.write("\n" + "=" * 65 + "\n")
-                f.write("Bootstrap AUC Comparison Results\n")
-                f.write("=" * 65 + "\n")
-                for _, row in bootstrap_df.iterrows():
-                    f.write(f"  {row['Model_A']} vs {row['Model_B']}: "
-                            f"dAUC={row['AUC_Diff']:+.4f}  "
-                            f"95%CI=[{row['CI_95_Low']:+.4f},"
-                            f"{row['CI_95_High']:+.4f}]  "
-                            f"P={row['P_value']:.4f} -> {row['Decision']}\n")
-            if ablation_data:
-                f.write(f"\nOptimal Features ({ablation_data['opt_n']}): "
-                        f"{', '.join(ablation_data['opt_feats'])}\n")
-
-        # Save best model pickle
+            f.write("="*70+"\n")
+            f.write("ML Multi-Class Pipeline v4 — Model Selection Report\n")
+            f.write("="*70+"\n\n")
+            f.write(f"Task: {task_type} ({n_classes} classes: {classes})\n")
+            f.write(f"Training samples: {X.shape[0]}  Features: {X.shape[1]}\n\n")
+
+            f.write("─"*70+"\n")
+            f.write("STEP 1: DeLong Test — Model Screening\n")
+            f.write(f"Reference (best): {ref_mn}  OOF AUC={ref_auc:.4f}\n")
+            f.write(f"{'Model':<12}{'AUC':>8}{'ΔAUC':>9}{'CI_Low':>10}"
+                    f"{'CI_High':>10}{'Z':>8}{'P':>9}{'Decision':<12}\n")
+            f.write("─"*70+"\n")
+            for _,row in delong_step1_df.iterrows():
+                f.write(f"{row['Model']:<12}{row['AUC']:>8.4f}"
+                        f"{row['AUC_Diff']:>+9.4f}{row['CI_95_Low']:>+10.4f}"
+                        f"{row['CI_95_High']:>+10.4f}{row['Z_score']:>+8.3f}"
+                        f"{row['P_value']:>9.4f}  {row['Decision']}\n")
+            f.write(f"\nRetained: {', '.join(delong_retained)}\n\n")
+
+            f.write("─"*70+"\n")
+            f.write("STEP 2: Feature Ablation + DeLong Test (subset vs full)\n")
+            f.write(f"{'Model':<12}{'Full_AUC':>10}{'Opt_N':>7}"
+                    f"{'Opt_AUC':>10}{'p@Opt':>10}\n")
+            f.write("─"*70+"\n")
+            for mn in models_with_shap:
+                res=ablation_results[mn]
+                f.write(f"{mn:<12}{res['full_auc']:>10.4f}{res['opt_n']:>7d}"
+                        f"{res['opt_auc']:>10.4f}"
+                        f"{res['pvals'][res['opt_n']-1]:>10.4f}\n")
+
+            f.write("\n"+"─"*70+"\n")
+            f.write("STEP 3: Final Model Selection\n")
+            f.write("Rule: ① min(Opt_N); ② tie-break → max(Full_AUC)\n")
+            f.write(f"{'Model':<12}{'Opt_N':>7}{'Opt_AUC':>10}{'Full_AUC':>11}\n")
+            f.write("─"*70+"\n")
+            if not selection_df.empty:
+                for _,row in selection_df.sort_values(
+                        ['Opt_N','Full_AUC'],ascending=[True,False]).iterrows():
+                    marker='★ WINNER' if row['Model']==best_mn else ''
+                    f.write(f"{row['Model']:<12}{row['Opt_N']:>7d}"
+                            f"{row['Opt_AUC']:>10.4f}"
+                            f"{row['Full_AUC']:>11.4f}  {marker}\n")
+            f.write(f"\nFINAL MODEL: {best_mn}\n")
+            f.write(f"Optimal features ({len(final_feats)}): "
+                    f"{', '.join(final_feats)}\n")
+
+        # Save model
         pickle.dump({
-            'model_name':  best_mn,
-            'model':       tms[best_mn],
-            'best_params': bpd[best_mn],
-            'classes':     classes,
-            'n_classes':   n_classes,
-            'label_map':   label_map,
-            'features':    final_feats,
-            'task_type':   task_type,
-        }, open(os.path.join(rf, f'model_{best_mn}.pkl'), 'wb'))
+            'model_name':best_mn,'model':tms[best_mn],
+            'best_params':bpd[best_mn],'classes':classes,
+            'n_classes':n_classes,'label_map':label_map,
+            'features':final_feats,'task_type':task_type,
+        },open(os.path.join(rf,f'model_{best_mn}.pkl'),'wb'))
 
         # ── ZIP ───────────────────────────────────────────────────────────
         progress(0.97, desc="📦 打包ZIP...")
-        zp = os.path.join(tempfile.gettempdir(),
-                          f"ml_results_{int(time.time())}_{os.getpid()}.zip")
-        with zipfile.ZipFile(zp, 'w', zipfile.ZIP_DEFLATED) as zf:
-            for root, _, files in os.walk(rf):
+        zp=os.path.join(tempfile.gettempdir(),
+                        f"ml_results_{int(time.time())}_{os.getpid()}.zip")
+        with zipfile.ZipFile(zp,'w',zipfile.ZIP_DEFLATED) as zf:
+            for root,_,files in os.walk(rf):
                 for fn in files:
-                    zf.write(os.path.join(root, fn),
-                             os.path.relpath(os.path.join(root, fn), rf))
+                    zf.write(os.path.join(root,fn),
+                             os.path.relpath(os.path.join(root,fn),rf))
+        nf=sum(len(f) for _,_,f in os.walk(rf))
+        shutil.rmtree(rf,ignore_errors=True); gc.collect()
 
-        nf = sum(len(f) for _, _, f in os.walk(rf))
-        shutil.rmtree(rf, ignore_errors=True); gc.collect()
-
-        tm_b = train_results[best_mn]['metrics']
         log(f"\n{'━'*60}")
-        log(f"  🎉 分析完成！共 {nf} 个文件已打包")
-        log(f"  📋 Task: {task_type}  |  Best Model: {best_mn}")
-        log(f"  📊 Train  — AUC={tm_b['Macro_AUC']:.4f}  "
-            f"Acc={tm_b['Accuracy']:.4f}  "
-            f"Sens={tm_b['Macro_Sensitivity']:.4f}  "
-            f"Spec={tm_b['Macro_Specificity']:.4f}  "
-            f"PPV={tm_b['Macro_PPV']:.4f}  NPV={tm_b['Macro_NPV']:.4f}  "
-            f"F1={tm_b['Macro_F1']:.4f}")
-        log(f"  📊 CV-OOF — AUC={best_auc:.4f}  "
-            f"Acc={amr[best_mn]['mean_acc']:.4f}  "
-            f"Sens={amr[best_mn]['mean_sens']:.4f}  "
-            f"Spec={amr[best_mn]['mean_spec']:.4f}  "
-            f"PPV={amr[best_mn]['mean_ppv']:.4f}  "
-            f"NPV={amr[best_mn]['mean_npv']:.4f}  "
-            f"F1={amr[best_mn]['mean_f1']:.4f}")
+        log(f"  🎉 分析完成！{nf} 个文件已打包")
+        log(f"  🏆 最终模型: {best_mn}  最优特征数: {len(final_feats)}")
+        log(f"  📊 全特征OOF AUC: {amr[best_mn]['mean_auc']:.4f}")
+        log(f"  📊 消融后OOF AUC: {ablation_results[best_mn]['opt_auc']:.4f}"
+            if best_mn in ablation_results else "")
         log(f"{'━'*60}")
         progress(1.0, desc="✅ 完成!")
-        return zp, "\n".join(L)
+        return zp,"\n".join(L)
 
     except Exception as e:
-        log(f"\n❌ 错误: {e}")
-        log(traceback.format_exc())
-        if os.path.exists(rf): shutil.rmtree(rf, ignore_errors=True)
+        log(f"\n❌ 错误: {e}"); log(traceback.format_exc())
+        if os.path.exists(rf): shutil.rmtree(rf,ignore_errors=True)
         gc.collect()
-        return None, "\n".join(L)
+        return None,"\n".join(L)
 
 
 # ============================================================================
 # Gradio UI
 # ============================================================================
-CUSTOM_CSS = """
-.header-banner {
-    background: linear-gradient(135deg, #0a2463 0%, #1e3a7a 40%, #2554a8 100%);
-    border-radius: 16px; padding: 28px 36px; margin-bottom: 20px;
-    box-shadow: 0 8px 32px rgba(0,0,0,0.18); position: relative; overflow: hidden;
-}
-.header-banner::before {
-    content: ''; position: absolute; top: -50%; right: -20%;
-    width: 400px; height: 400px;
-    background: radial-gradient(circle, rgba(96,165,250,0.2) 0%, transparent 70%);
-    border-radius: 50%;
-}
-.header-banner img { max-height: 52px; border-radius: 6px; margin-bottom: 12px; }
-.header-banner h1 { color: #e2e8f0 !important; font-size: 1.7em !important;
-    margin: 4px 0 6px 0 !important; font-weight: 700 !important; }
-.header-banner p { color: #94a3b8 !important; font-size: 0.92em !important;
-    margin: 2px 0 !important; line-height: 1.6; }
-.header-banner .credit { color: #64748b !important; font-size: 0.82em !important;
-    margin-top: 10px !important;
-    border-top: 1px solid rgba(148,163,184,0.15); padding-top: 10px; }
-.section-title {
-    background: linear-gradient(90deg, #2563eb 0%, #3b82f6 100%);
-    color: white !important; padding: 8px 16px; border-radius: 8px;
-    font-size: 0.95em !important; font-weight: 600 !important;
-    margin: 12px 0 8px 0; }
-.pipeline-box {
-    background: linear-gradient(135deg, #f0f9ff 0%, #e0f2fe 100%);
-    border: 1px solid #bae6fd; border-radius: 12px;
-    padding: 14px 18px; margin: 8px 0; font-size: 0.88em; }
-.pipeline-box code { background: #2563eb; color: white; padding: 2px 8px;
-    border-radius: 4px; font-size: 0.85em; margin: 0 2px; }
-.log-area textarea {
-    font-family: 'Menlo','Consolas',monospace !important;
-    font-size: 12.5px !important; line-height: 1.5 !important;
-    background: #0f172a !important; color: #e2e8f0 !important;
-    border-radius: 10px !important; padding: 16px !important; }
-.gradio-container { max-width: 1280px !important; }
-footer { display: none !important; }
+CUSTOM_CSS="""
+.header-banner{background:linear-gradient(135deg,#0a2463 0%,#1e3a7a 40%,#2554a8 100%);
+border-radius:16px;padding:28px 36px;margin-bottom:20px;
+box-shadow:0 8px 32px rgba(0,0,0,0.18);position:relative;overflow:hidden;}
+.header-banner::before{content:'';position:absolute;top:-50%;right:-20%;
+width:400px;height:400px;
+background:radial-gradient(circle,rgba(96,165,250,0.2) 0%,transparent 70%);border-radius:50%;}
+.header-banner img{max-height:52px;border-radius:6px;margin-bottom:12px;}
+.header-banner h1{color:#e2e8f0!important;font-size:1.7em!important;
+margin:4px 0 6px 0!important;font-weight:700!important;}
+.header-banner p{color:#94a3b8!important;font-size:0.92em!important;
+margin:2px 0!important;line-height:1.6;}
+.header-banner .credit{color:#64748b!important;font-size:0.82em!important;
+margin-top:10px!important;border-top:1px solid rgba(148,163,184,0.15);padding-top:10px;}
+.section-title{background:linear-gradient(90deg,#2563eb 0%,#3b82f6 100%);
+color:white!important;padding:8px 16px;border-radius:8px;
+font-size:0.95em!important;font-weight:600!important;margin:12px 0 8px 0;}
+.pipeline-box{background:linear-gradient(135deg,#f0f9ff 0%,#e0f2fe 100%);
+border:1px solid #bae6fd;border-radius:12px;padding:14px 18px;margin:8px 0;font-size:0.88em;}
+.pipeline-box code{background:#2563eb;color:white;padding:2px 8px;
+border-radius:4px;font-size:0.85em;margin:0 2px;}
+.log-area textarea{font-family:'Menlo','Consolas',monospace!important;
+font-size:12.5px!important;line-height:1.5!important;
+background:#0f172a!important;color:#e2e8f0!important;
+border-radius:10px!important;padding:16px!important;}
+.gradio-container{max-width:1280px!important;}
+footer{display:none!important;}
 """
 
 with gr.Blocks(
-    title="ML 多分类模型平台 — 复旦大学附属眼耳鼻喉科医院",
-    theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate",
-                         neutral_hue="slate"),
+    title="ML 多分类模型平台 v4 — 复旦大学附属眼耳鼻喉科医院",
+    theme=gr.themes.Soft(primary_hue="blue",secondary_hue="slate",neutral_hue="slate"),
     css=CUSTOM_CSS,
 ) as demo:
 
@@ -1358,101 +1212,86 @@ with gr.Blocks(
     <div class="header-banner">
         <img src="https://huggingface.co/spaces/fudan-renjun/machine-learning-2/resolve/main/hospital_logo.png"
              alt="Logo" onerror="this.style.display='none'"/>
-        <h1>🧬 ML 多分类模型训练与评估平台</h1>
-        <p>支持 2~8 分类 · 上传 CSV 即可完成全流程分析</p>
-        <p>评估指标：AUC · Accuracy · Sensitivity · Specificity · PPV · NPV · F1 · Kappa</p>
+        <h1>🧬 ML 多分类模型训练与评估平台 v4</h1>
+        <p>支持 2~8 分类 · DeLong检验模型筛选 · 逐模型特征消融 · 自动最优模型决策</p>
+        <p>指标：AUC · Accuracy · Sensitivity · Specificity · PPV · NPV · F1 · Kappa</p>
         <p class="credit">复旦大学附属眼耳鼻喉科医院 · 检验科 · 任俊</p>
     </div>
     """)
 
     gr.HTML("""
     <div class="pipeline-box">
-        <strong>📋 流程：</strong>
-        <code>训练+训练集评估</code> → <code>交叉验证(OOF)</code> →
-        <code>Train vs Val对比</code> → <code>SHAP</code> →
-        <code>特征消融</code> → <code>外部验证</code>
-        &nbsp;|&nbsp;
-        <strong>指标：</strong>
-        AUC · Accuracy · Sensitivity · Specificity · PPV · NPV · F1 · Kappa（宏平均+逐类）
+        <strong>📋 三步流程：</strong>
+        <code>Step1 DeLong筛模型</code> → <code>Step2 逐模型消融+DeLong vs全特征</code> →
+        <code>Step3 特征最少优先选最终模型</code>
         &nbsp;|&nbsp;
         <strong>CSV：</strong> 第1列=标签(整数), 第2列=ID, 第3列起=特征
+        &nbsp;|&nbsp;
+        <strong>输出：</strong> ROC/PR/CM(训练+验证) · SHAP · 消融曲线 · DeLong检验表 · 选模报告
     </div>
     """)
 
     with gr.Row(equal_height=False):
         with gr.Column(scale=5):
             gr.HTML('<div class="section-title">📂 数据上传</div>')
-            train_file = gr.File(label="训练集 CSV（必需）", file_types=[".csv"])
+            train_file=gr.File(label="训练集 CSV（必需）",file_types=[".csv"])
             gr.HTML('<p style="color:#64748b;font-size:0.85em;margin:4px 0 8px 0;">'
                     '验证集可选，支持同时上传 1~3 个</p>')
             with gr.Row():
-                val_file1 = gr.File(label="验证集 1（可选）",
-                                    file_types=[".csv"], scale=1)
-                val_file2 = gr.File(label="验证集 2（可选）",
-                                    file_types=[".csv"], scale=1)
-                val_file3 = gr.File(label="验证集 3（可选）",
-                                    file_types=[".csv"], scale=1)
+                val_file1=gr.File(label="验证集 1（可选）",file_types=[".csv"],scale=1)
+                val_file2=gr.File(label="验证集 2（可选）",file_types=[".csv"],scale=1)
+                val_file3=gr.File(label="验证集 3（可选）",file_types=[".csv"],scale=1)
 
             gr.HTML('<div class="section-title">🏷️ 分类设置</div>')
-            n_classes_select = gr.Dropdown(
-                choices=["2 类（二分类）","3 类","4 类","5 类",
-                         "6 类","7 类","8 类"],
-                value="2 类（二分类）", label="选择分类数",
-                info="请根据数据标签列的类别数选择，系统将自动验证是否匹配",
-            )
+            n_classes_select=gr.Dropdown(
+                choices=["2 类（二分类）","3 类","4 类","5 类","6 类","7 类","8 类"],
+                value="2 类（二分类）",label="选择分类数",
+                info="请根据数据标签列的类别数选择，系统将自动验证是否匹配")
 
             gr.HTML('<div class="section-title">🤖 模型选择</div>')
-            model_selector = gr.Dropdown(
-                choices=ALL_MODEL_NAMES, value=ALL_MODEL_NAMES,
-                multiselect=True, label="选择模型（均支持多分类）",
-                info=("RF=随机森林  DT=决策树  KNN=K近邻  XGB=XGBoost  "
-                      "AdaBoost  LR=逻辑回归  NB=朴素贝叶斯  SVM=支持向量机"),
-            )
+            model_selector=gr.Dropdown(
+                choices=ALL_MODEL_NAMES,value=ALL_MODEL_NAMES,multiselect=True,
+                label="选择模型（均支持多分类）",
+                info="RF=随机森林  DT=决策树  KNN=K近邻  XGB=XGBoost  "
+                     "AdaBoost  LR=逻辑回归  NB=朴素贝叶斯  SVM=支持向量机")
             with gr.Row():
-                btn_all    = gr.Button("🔘 全选",      size="sm", variant="secondary")
-                btn_tree   = gr.Button("🌲 树模型",    size="sm", variant="secondary")
-                btn_linear = gr.Button("📐 线性模型",  size="sm", variant="secondary")
-                btn_top4   = gr.Button("⚡ 经典四模型", size="sm", variant="secondary")
-            btn_all.click(lambda: ALL_MODEL_NAMES,                outputs=model_selector)
-            btn_tree.click(lambda: ['RF','DT','XGB','AdaBoost'],  outputs=model_selector)
-            btn_linear.click(lambda: ['LR','SVM','NB'],           outputs=model_selector)
-            btn_top4.click(lambda: ['RF','XGB','LR','SVM'],       outputs=model_selector)
+                btn_all   =gr.Button("🔘 全选",      size="sm",variant="secondary")
+                btn_tree  =gr.Button("🌲 树模型",    size="sm",variant="secondary")
+                btn_linear=gr.Button("📐 线性模型",  size="sm",variant="secondary")
+                btn_top4  =gr.Button("⚡ 经典四模型",size="sm",variant="secondary")
+            btn_all.click(lambda:ALL_MODEL_NAMES,              outputs=model_selector)
+            btn_tree.click(lambda:['RF','DT','XGB','AdaBoost'],outputs=model_selector)
+            btn_linear.click(lambda:['LR','SVM','NB'],         outputs=model_selector)
+            btn_top4.click(lambda:['RF','XGB','LR','SVM'],     outputs=model_selector)
 
             gr.HTML('<div class="section-title">⚙️ 参数配置</div>')
-            enable_tuning = gr.Checkbox(
-                value=False,
+            enable_tuning=gr.Checkbox(value=False,
                 label="启用超参数调优 (GridSearchCV)  ⚠️ 开启后运行时间显著增加")
             with gr.Row():
-                cv_folds = gr.Slider(3, 10, value=5, step=1,
-                                     label="交叉验证折数")
-                top_n    = gr.Slider(5, 50, value=20, step=1,
-                                     label="SHAP 前 N 个特征")
-            shap_sz = gr.Slider(30, 200, value=80, step=10,
-                                label="SHAP 采样数量")
-
-            run_btn = gr.Button("🚀 开始分析", variant="primary", size="lg")
+                cv_folds=gr.Slider(3,10,value=5,step=1,label="交叉验证折数")
+                top_n   =gr.Slider(5,50,value=20,step=1,label="SHAP 前 N 个特征（消融上限）")
+            shap_sz=gr.Slider(30,200,value=80,step=10,label="SHAP 采样数量")
+            run_btn=gr.Button("🚀 开始分析",variant="primary",size="lg")
 
         with gr.Column(scale=5):
             gr.HTML('<div class="section-title">📋 运行日志</div>')
-            log_output = gr.Textbox(
-                label="", lines=24, max_lines=50, interactive=False,
-                placeholder=("点击「开始分析」后，日志将在此显示...\n"
-                             "支持 2~8 分类。\n"
-                             "评估指标：AUC / Accuracy / Sensitivity / "
-                             "Specificity / PPV / NPV / F1 / Kappa"),
-                elem_classes="log-area",
-            )
+            log_output=gr.Textbox(
+                label="",lines=28,max_lines=60,interactive=False,
+                placeholder=("点击「开始分析」后日志将在此显示...\n\n"
+                             "Step 1: DeLong检验 — 筛选与最佳模型无显著差异的模型\n"
+                             "Step 2: 各保留模型SHAP+特征消融+DeLong(子集 vs 全特征)\n"
+                             "Step 3: 特征数最少者为最终模型；相同则取AUC最高者"),
+                elem_classes="log-area")
             gr.HTML('<div class="section-title">⬇️ 结果下载</div>')
-            zip_output = gr.File(label="分析结果 ZIP 压缩包")
+            zip_output=gr.File(label="分析结果 ZIP 压缩包")
 
     run_btn.click(
         fn=run_pipeline,
-        inputs=[train_file, val_file1, val_file2, val_file3,
-                n_classes_select, model_selector, enable_tuning,
-                cv_folds, top_n, shap_sz],
-        outputs=[zip_output, log_output],
-        api_name="run",
-    )
+        inputs=[train_file,val_file1,val_file2,val_file3,
+                n_classes_select,model_selector,enable_tuning,
+                cv_folds,top_n,shap_sz],
+        outputs=[zip_output,log_output],
+        api_name="run")
 
 
 # ============================================================================
@@ -1460,28 +1299,22 @@ with gr.Blocks(
 # ============================================================================
 from datetime import datetime
 
-ACCOUNTS = {
-    "admin":  {"password": "admin123",  "expires": None},
-    "renjun": {"password": "fudan2025", "expires": "2027-12-31"},
-    "guest":  {"password": "guest888",  "expires": "2027-06-30"},
+ACCOUNTS={
+    "admin":  {"password":"admin123",  "expires":None},
+    "renjun": {"password":"fudan2025", "expires":"2027-12-31"},
+    "guest":  {"password":"guest888",  "expires":"2027-06-30"},
 }
 
-def auth_fn(username, password):
-    user = ACCOUNTS.get(username)
-    if not user or user["password"] != password: return False
+def auth_fn(username,password):
+    user=ACCOUNTS.get(username)
+    if not user or user["password"]!=password: return False
     if user["expires"]:
         try:
-            if datetime.now() > datetime.strptime(user["expires"], "%Y-%m-%d"):
-                return False
+            if datetime.now()>datetime.strptime(user["expires"],"%Y-%m-%d"): return False
         except: return False
     return True
 
 demo.queue()
-demo.launch(
-    server_name="0.0.0.0",
-    server_port=7860,
-    auth=auth_fn,
-    auth_message=("🔐 复旦大学附属眼耳鼻喉科医院 · ML多分类分析平台\n"
-                  "请输入账号和密码登录"),
-    ssr_mode=False,
-)
\ No newline at end of file
+demo.launch(server_name="0.0.0.0",server_port=7860,auth=auth_fn,
+    auth_message="🔐 复旦大学附属眼耳鼻喉科医院 · ML多分类分析平台 v4\n请输入账号和密码登录",
+    ssr_mode=False)
\ No newline at end of file