diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -3,26 +3,27 @@ ML Multi-Class Classification Pipeline (2-8 classes) Eye & ENT Hospital of Fudan University — Laboratory Medicine, Ren Jun Gradio 5.12.0 + Python 3.11 -Changelog v3 (vs v2): - [v3-1] compute_multiclass_metrics now returns full per-class and macro - AUC, Accuracy, Sensitivity (Recall), Specificity, Precision (PPV), - NPV, F1 for every class, plus macro/weighted averages. - [v3-2] Per-fold metrics table extended with all new indicators. - [v3-3] Summary sheets (Summary_InternalVal, Train_vs_InternalVal) carry - all new macro indicators. - [v3-4] Per-class detail sheets written for every model (train + val). - [v3-5] External validation Excel extended with all new indicators. - [v3-6] best_params.txt log extended with all new indicators. - [v3-7] Console log shows key new indicators. - -Previous fixes retained: - [FIX-1] XGBoost num_class=None bug - [FIX-2] Bootstrap p-value centered on 0 - [FIX-3] SHAP 3D axis detection - [FIX-4] Per-model train-set ROC/PR/CM - [FIX-5] Best-model Train vs InternalVal overlay plots - [FIX-6] Train_vs_InternalVal Excel sheet - [FIX-7] Guest account expiry updated +Changelog v4 (vs v3): + [v4-1] DeLong test (bootstrap implementation, multi-class safe) replaces + simple Bootstrap for step-1 model comparison. + [v4-2] Step-1 selection: best model by AUC + all models NOT significantly + different from it (DeLong p >= 0.05) → "delong_retained" group. + [v4-3] Step-2 ablation: run SHAP + incremental feature ablation for EVERY + model in delong_retained, each using its own SHAP ranking. + [v4-4] Within each ablation curve, DeLong test compares every N-feature + subset AUC vs the corresponding full-feature AUC of that model + (OOF predictions); first N where p >= 0.05 is "opt_n". + [v4-5] Final model selection: model with smallest opt_n; tie → highest + full-feature OOF AUC. + [v4-6] Combined ablation plot shows all retained models' ablation curves + with their optimal points annotated. + [v4-7] All ablation results, DeLong tables, and model-selection reasoning + saved to Excel + txt. + +All v3 features retained: + Full per-class Sensitivity/Specificity/PPV/NPV/F1/AUC metrics, + Train-set ROC/PR/CM, Train vs Val overlay, per-fold tables, + per-class Excel sheets, external validation, SHAP bar plots. """ import numpy as np @@ -42,7 +43,7 @@ from sklearn.metrics import ( roc_auc_score, confusion_matrix, roc_curve, auc as auc_score, precision_recall_curve, classification_report, accuracy_score, f1_score, - cohen_kappa_score, precision_score, recall_score + cohen_kappa_score, precision_score, recall_score, ) from sklearn.preprocessing import label_binarize import seaborn as sns @@ -62,7 +63,6 @@ import gradio as gr warnings.filterwarnings('ignore') -# Publication-quality plot settings plt.rcParams['font.family'] = 'serif' plt.rcParams['font.serif'] = ['Times New Roman', 'DejaVu Serif', 'serif'] plt.rcParams['font.sans-serif'] = ['Arial', 'DejaVu Sans'] @@ -76,7 +76,7 @@ plt.rcParams['xtick.labelsize'] = 11 plt.rcParams['ytick.labelsize'] = 11 # ============================================================================ -# Cache Cleanup +# Cache cleanup # ============================================================================ CLEANUP_MAX_AGE_MINUTES = 30 CLEANUP_INTERVAL_SECONDS = 600 @@ -96,127 +96,150 @@ def cleanup_old_temp_files(): def periodic_cleanup(): while True: - time.sleep(CLEANUP_INTERVAL_SECONDS) - cleanup_old_temp_files() + time.sleep(CLEANUP_INTERVAL_SECONDS); cleanup_old_temp_files() + +threading.Thread(target=periodic_cleanup, daemon=True).start() -_ct = threading.Thread(target=periodic_cleanup, daemon=True); _ct.start() # ============================================================================ -# [v3-1] Extended metrics: Sensitivity, Specificity, PPV, NPV per class +# [v4-1] DeLong test — bootstrap implementation, multi-class safe # ============================================================================ -def compute_per_class_sens_spec_ppv_npv(y_true, y_pred, y_proba, classes): - """ - For each class c, treat it as a binary OvR problem: - TP = predicted c AND true c - FP = predicted c AND true != c - FN = predicted != c AND true c - TN = predicted != c AND true != c +def _macro_auc_single(y_true, y_proba, n_classes): + """Compute macro AUC for a single sample set.""" + try: + if n_classes == 2: + return roc_auc_score(y_true, y_proba[:, 1]) + return roc_auc_score(y_true, y_proba, multi_class='ovr', average='macro') + except: + return np.nan - Returns a dict keyed by class index with: - Sensitivity (Recall / TPR), Specificity (TNR), - PPV (Precision), NPV, F1, AUC (OvR) - Also returns macro averages of each metric. +def delong_test(y_true, proba_a, proba_b, classes, n_bootstrap=2000, seed=42): + """ + Bootstrap-based DeLong-equivalent test for comparing two ROC curves. + Works for both binary and multi-class (macro AUC, OvR). + + Returns + ------- + p_value : float two-sided p-value under H0: AUC_A == AUC_B + auc_a : float + auc_b : float + ci_low : float 95 % CI of (AUC_A - AUC_B) + ci_high : float + z_score : float observed_diff / bootstrap_SE """ + rng = np.random.RandomState(seed) + n = len(y_true) + nc = len(classes) + + auc_a = _macro_auc_single(y_true, proba_a, nc) + auc_b = _macro_auc_single(y_true, proba_b, nc) + observed_diff = auc_a - auc_b + + diffs = [] + for _ in range(n_bootstrap): + idx = rng.choice(n, n, replace=True) + yt_b = y_true[idx] + # Skip resamples that lose a class (can't compute multi-class AUC) + if len(np.unique(yt_b)) < nc: + continue + a1 = _macro_auc_single(yt_b, proba_a[idx], nc) + a2 = _macro_auc_single(yt_b, proba_b[idx], nc) + if np.isnan(a1) or np.isnan(a2): + continue + diffs.append(a1 - a2) + + if len(diffs) < 100: + return 1.0, auc_a, auc_b, -1.0, 1.0, 0.0 + + diffs = np.array(diffs) + se = diffs.std(ddof=1) + z_score = observed_diff / se if se > 0 else 0.0 + + # Two-sided p: fraction of |bootstrap diffs| >= |observed diff| (H0: diff=0) + p_value = float(np.mean(np.abs(diffs) >= np.abs(observed_diff))) + p_value = max(p_value, 1.0 / n_bootstrap) + + ci_low = float(np.percentile(diffs, 2.5)) + ci_high = float(np.percentile(diffs, 97.5)) + + return p_value, float(auc_a), float(auc_b), ci_low, ci_high, z_score + + +# ============================================================================ +# Metrics +# ============================================================================ + +def compute_per_class_sens_spec_ppv_npv(y_true, y_pred, y_proba, classes): n_classes = len(classes) - y_true = np.asarray(y_true) - y_pred = np.asarray(y_pred) + y_true = np.asarray(y_true); y_pred = np.asarray(y_pred) y_bin = label_binarize(y_true, classes=classes) if n_classes == 2: y_bin = np.hstack([1 - y_bin, y_bin]) per_class = {} for i, c in enumerate(classes): - yt_b = y_bin[:, i] # true binary label for class c + yt_b = y_bin[:, i] yp_b = (y_pred == c).astype(int) - TP = int(np.sum((yt_b == 1) & (yp_b == 1))) FP = int(np.sum((yt_b == 0) & (yp_b == 1))) FN = int(np.sum((yt_b == 1) & (yp_b == 0))) TN = int(np.sum((yt_b == 0) & (yp_b == 0))) - - sens = TP / (TP + FN) if (TP + FN) > 0 else 0.0 # Sensitivity = Recall - spec = TN / (TN + FP) if (TN + FP) > 0 else 0.0 # Specificity - ppv = TP / (TP + FP) if (TP + FP) > 0 else 0.0 # PPV = Precision - npv = TN / (TN + FN) if (TN + FN) > 0 else 0.0 # NPV - f1 = (2 * ppv * sens / (ppv + sens)) if (ppv + sens) > 0 else 0.0 - + sens = TP / (TP + FN) if (TP + FN) > 0 else 0.0 + spec = TN / (TN + FP) if (TN + FP) > 0 else 0.0 + ppv = TP / (TP + FP) if (TP + FP) > 0 else 0.0 + npv = TN / (TN + FN) if (TN + FN) > 0 else 0.0 + f1 = 2 * ppv * sens / (ppv + sens) if (ppv + sens) > 0 else 0.0 try: auc_c = roc_auc_score(yt_b, y_proba[:, i]) except: auc_c = 0.0 - per_class[c] = { 'TP': TP, 'FP': FP, 'FN': FN, 'TN': TN, 'Sensitivity': sens, 'Specificity': spec, - 'PPV': ppv, 'NPV': npv, 'F1': f1, 'AUC': auc_c + 'PPV': ppv, 'NPV': npv, 'F1': f1, 'AUC': auc_c, } - # Macro averages - macro = {} - for metric in ['Sensitivity', 'Specificity', 'PPV', 'NPV', 'F1', 'AUC']: - macro[f'Macro_{metric}'] = np.mean([per_class[c][metric] for c in classes]) - + macro = {f'Macro_{m}': np.mean([per_class[c][m] for c in classes]) + for m in ['Sensitivity','Specificity','PPV','NPV','F1','AUC']} return per_class, macro def compute_multiclass_metrics(y_true, y_pred, y_proba, classes): - """ - [v3-1] Extended: returns AUC, Accuracy, Sensitivity, Specificity, - Precision (PPV), NPV, F1 — macro and per-class — plus Kappa. - """ n_classes = len(classes) - y_true = np.asarray(y_true) - y_pred = np.asarray(y_pred) - + y_true = np.asarray(y_true); y_pred = np.asarray(y_pred) acc = accuracy_score(y_true, y_pred) kappa = cohen_kappa_score(y_true, y_pred) - - # Macro AUC try: - if n_classes == 2: - macro_auc = roc_auc_score(y_true, y_proba[:, 1]) - else: - macro_auc = roc_auc_score(y_true, y_proba, - multi_class='ovr', average='macro') + macro_auc = (_macro_auc_single(y_true, y_proba, n_classes) + if not np.isnan(_macro_auc_single(y_true, y_proba, n_classes)) + else 0.0) except: macro_auc = 0.0 - - # sklearn macro/weighted aggregates - f1_macro = f1_score(y_true, y_pred, average='macro', zero_division=0, labels=classes) - f1_weighted = f1_score(y_true, y_pred, average='weighted', zero_division=0, labels=classes) - prec_macro = precision_score(y_true, y_pred, average='macro', zero_division=0, labels=classes) - recall_macro = recall_score( y_true, y_pred, average='macro', zero_division=0, labels=classes) - - # Per-class Sensitivity / Specificity / PPV / NPV / F1 / AUC + f1_macro = f1_score(y_true, y_pred, average='macro', zero_division=0, labels=classes) + f1_weighted = f1_score(y_true, y_pred, average='weighted', zero_division=0, labels=classes) per_class, macro_ext = compute_per_class_sens_spec_ppv_npv( y_true, y_pred, y_proba, classes) - - # sklearn classification_report (for precision/recall/f1 by class) report = classification_report( y_true, y_pred, labels=classes, output_dict=True, zero_division=0) - return { - # ── Macro aggregates ── 'Accuracy': acc, 'Macro_AUC': macro_auc, - 'Macro_Sensitivity': macro_ext['Macro_Sensitivity'], # == Macro Recall + 'Macro_Sensitivity': macro_ext['Macro_Sensitivity'], 'Macro_Specificity': macro_ext['Macro_Specificity'], - 'Macro_PPV': macro_ext['Macro_PPV'], # == Macro Precision + 'Macro_PPV': macro_ext['Macro_PPV'], 'Macro_NPV': macro_ext['Macro_NPV'], 'Macro_F1': macro_ext['Macro_F1'], 'Weighted_F1': f1_weighted, 'Kappa': kappa, - # ── Per-class detail ── - 'per_class': per_class, # dict keyed by class value + 'per_class': per_class, 'report': report, } def metrics_to_flat_row(metrics, prefix=''): - """Flatten a metrics dict into a single-row dict for DataFrame construction.""" - row = { + return { f'{prefix}AUC': metrics['Macro_AUC'], f'{prefix}Accuracy': metrics['Accuracy'], f'{prefix}Sensitivity': metrics['Macro_Sensitivity'], @@ -227,36 +250,22 @@ def metrics_to_flat_row(metrics, prefix=''): f'{prefix}Weighted_F1': metrics['Weighted_F1'], f'{prefix}Kappa': metrics['Kappa'], } - return row def per_class_df(metrics, classes): - """Build a tidy per-class DataFrame from compute_multiclass_metrics output.""" rows = [] for c in classes: pc = metrics['per_class'][c] - rows.append({ - 'Class': c, - 'AUC': pc['AUC'], - 'Sensitivity': pc['Sensitivity'], - 'Specificity': pc['Specificity'], - 'PPV': pc['PPV'], - 'NPV': pc['NPV'], - 'F1': pc['F1'], - 'TP': pc['TP'], - 'FP': pc['FP'], - 'FN': pc['FN'], - 'TN': pc['TN'], - }) - # Append macro row + rows.append({'Class': c, **{k: pc[k] for k in + ['AUC','Sensitivity','Specificity','PPV','NPV','F1','TP','FP','FN','TN']}}) rows.append({ - 'Class': 'Macro', - 'AUC': metrics['Macro_AUC'], + 'Class': 'Macro', + 'AUC': metrics['Macro_AUC'], 'Sensitivity': metrics['Macro_Sensitivity'], 'Specificity': metrics['Macro_Specificity'], - 'PPV': metrics['Macro_PPV'], - 'NPV': metrics['Macro_NPV'], - 'F1': metrics['Macro_F1'], + 'PPV': metrics['Macro_PPV'], + 'NPV': metrics['Macro_NPV'], + 'F1': metrics['Macro_F1'], 'TP': '', 'FP': '', 'FN': '', 'TN': '', }) return pd.DataFrame(rows) @@ -265,258 +274,327 @@ def per_class_df(metrics, classes): # ============================================================================ # Plotting helpers # ============================================================================ - -def plot_multiclass_roc(y_true, y_proba, classes, title, filepath_prefix, rf): - """Plot ROC curves: one-vs-rest for each class + macro average.""" - n_classes = len(classes) - y_bin = label_binarize(y_true, classes=classes) - if n_classes == 2: - y_bin = np.hstack([1 - y_bin, y_bin]) - - fpr_dict, tpr_dict, auc_dict = {}, {}, {} - for i in range(n_classes): - fpr_dict[i], tpr_dict[i], _ = roc_curve(y_bin[:, i], y_proba[:, i]) - auc_dict[i] = auc_score(fpr_dict[i], tpr_dict[i]) - - all_fpr = np.unique(np.concatenate([fpr_dict[i] for i in range(n_classes)])) - mean_tpr = np.zeros_like(all_fpr) - for i in range(n_classes): - mean_tpr += np.interp(all_fpr, fpr_dict[i], tpr_dict[i]) - mean_tpr /= n_classes - macro_auc = auc_score(all_fpr, mean_tpr) - - COLORS = ['#e41a1c','#377eb8','#4daf4a','#984ea3', - '#ff7f00','#a65628','#f781bf','#999999'] - plt.figure(figsize=(10, 8)) - for i in range(n_classes): - plt.plot(fpr_dict[i], tpr_dict[i], color=COLORS[i % len(COLORS)], lw=2, - label=f'Class {classes[i]} (AUC={auc_dict[i]:.3f})') - plt.plot(all_fpr, mean_tpr, 'k--', lw=2.5, - label=f'Macro Avg (AUC={macro_auc:.3f})') - plt.plot([0,1],[0,1],'--',color='#cccccc',lw=1) +PLOT_COLORS = ['#e41a1c','#377eb8','#4daf4a','#984ea3', + '#ff7f00','#a65628','#f781bf','#999999'] +MODEL_COLORS = ['#2563eb','#f59e0b','#10b981','#ef4444', + '#8b5cf6','#ec4899','#06b6d4','#6b7280'] + + +def plot_multiclass_roc(y_true, y_proba, classes, title, prefix, rf): + n = len(classes) + yb = label_binarize(y_true, classes=classes) + if n == 2: yb = np.hstack([1 - yb, yb]) + fpr_d, tpr_d, auc_d = {}, {}, {} + for i in range(n): + fpr_d[i], tpr_d[i], _ = roc_curve(yb[:, i], y_proba[:, i]) + auc_d[i] = auc_score(fpr_d[i], tpr_d[i]) + all_fpr = np.unique(np.concatenate([fpr_d[i] for i in range(n)])) + mt = np.zeros_like(all_fpr) + for i in range(n): mt += np.interp(all_fpr, fpr_d[i], tpr_d[i]) + mt /= n; ma = auc_score(all_fpr, mt) + plt.figure(figsize=(10,8)) + for i in range(n): + plt.plot(fpr_d[i], tpr_d[i], color=PLOT_COLORS[i%8], lw=2, + label=f'Class {classes[i]} (AUC={auc_d[i]:.3f})') + plt.plot(all_fpr, mt, 'k--', lw=2.5, label=f'Macro Avg (AUC={ma:.3f})') + plt.plot([0,1],[0,1],'--',color='#ccc',lw=1) plt.xlim([-0.02,1.02]); plt.ylim([-0.02,1.02]) - plt.xlabel('False Positive Rate', fontsize=13) - plt.ylabel('True Positive Rate', fontsize=13) - plt.title(title, fontsize=14, fontweight='bold') - plt.legend(loc='lower right', fontsize=9) - plt.grid(True, alpha=0.15); plt.tight_layout() - plt.savefig(os.path.join(rf, f'{filepath_prefix}.pdf'), - format='pdf', bbox_inches='tight', dpi=300) - plt.savefig(os.path.join(rf, f'{filepath_prefix}.png'), - format='png', bbox_inches='tight', dpi=150) - plt.close() - return macro_auc, auc_dict - - -def plot_multiclass_pr(y_true, y_proba, classes, title, filepath_prefix, rf): - """Plot Precision-Recall curves for each class.""" - n_classes = len(classes) - y_bin = label_binarize(y_true, classes=classes) - if n_classes == 2: - y_bin = np.hstack([1 - y_bin, y_bin]) - - COLORS = ['#e41a1c','#377eb8','#4daf4a','#984ea3', - '#ff7f00','#a65628','#f781bf','#999999'] - plt.figure(figsize=(10, 8)) - for i in range(n_classes): - prec, rec, _ = precision_recall_curve(y_bin[:, i], y_proba[:, i]) - ap = auc_score(rec, prec) - plt.plot(rec, prec, color=COLORS[i % len(COLORS)], lw=2, - label=f'Class {classes[i]} (AP={ap:.3f})') + plt.xlabel('False Positive Rate',fontsize=13); plt.ylabel('True Positive Rate',fontsize=13) + plt.title(title,fontsize=14,fontweight='bold') + plt.legend(loc='lower right',fontsize=9); plt.grid(True,alpha=0.15); plt.tight_layout() + for ext,dpi in [('pdf',300),('png',150)]: + plt.savefig(os.path.join(rf,f'{prefix}.{ext}'),format=ext,bbox_inches='tight',dpi=dpi) + plt.close(); return ma, auc_d + + +def plot_multiclass_pr(y_true, y_proba, classes, title, prefix, rf): + n = len(classes) + yb = label_binarize(y_true, classes=classes) + if n == 2: yb = np.hstack([1 - yb, yb]) + plt.figure(figsize=(10,8)) + for i in range(n): + p,r,_ = precision_recall_curve(yb[:,i], y_proba[:,i]) + plt.plot(r, p, color=PLOT_COLORS[i%8], lw=2, + label=f'Class {classes[i]} (AP={auc_score(r,p):.3f})') plt.xlim([-0.02,1.02]); plt.ylim([-0.02,1.02]) - plt.xlabel('Recall', fontsize=13); plt.ylabel('Precision', fontsize=13) - plt.title(title, fontsize=14, fontweight='bold') - plt.legend(loc='lower left', fontsize=9) - plt.grid(True, alpha=0.15); plt.tight_layout() - plt.savefig(os.path.join(rf, f'{filepath_prefix}.pdf'), - format='pdf', bbox_inches='tight', dpi=300) - plt.savefig(os.path.join(rf, f'{filepath_prefix}.png'), - format='png', bbox_inches='tight', dpi=150) + plt.xlabel('Recall',fontsize=13); plt.ylabel('Precision',fontsize=13) + plt.title(title,fontsize=14,fontweight='bold') + plt.legend(loc='lower left',fontsize=9); plt.grid(True,alpha=0.15); plt.tight_layout() + for ext,dpi in [('pdf',300),('png',150)]: + plt.savefig(os.path.join(rf,f'{prefix}.{ext}'),format=ext,bbox_inches='tight',dpi=dpi) plt.close() -def plot_confusion_matrix(y_true, y_pred, classes, title, filepath_prefix, rf): - """Plot confusion matrix heatmap.""" +def plot_confusion_matrix(y_true, y_pred, classes, title, prefix, rf): cm = confusion_matrix(y_true, y_pred, labels=classes) - plt.figure(figsize=(max(6, len(classes)*1.2), max(5, len(classes)*1.0))) + plt.figure(figsize=(max(6,len(classes)*1.2), max(5,len(classes)*1.0))) sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=True, - xticklabels=classes, yticklabels=classes, - annot_kws={'fontsize': 11}) - plt.xlabel('Predicted', fontsize=12); plt.ylabel('True', fontsize=12) - plt.title(title, fontsize=13, fontweight='bold'); plt.tight_layout() - plt.savefig(os.path.join(rf, f'{filepath_prefix}.pdf'), - format='pdf', bbox_inches='tight', dpi=300) - plt.savefig(os.path.join(rf, f'{filepath_prefix}.png'), - format='png', bbox_inches='tight', dpi=150) - plt.close() - return cm - - -def plot_train_vs_val_roc(y_train, train_proba, y_val, val_proba, - classes, model_name, filepath_prefix, rf): - """Overlay train-set ROC and internal-validation (CV OOF) ROC.""" - n_classes = len(classes) - - def macro_roc(y_true, y_proba): - y_bin = label_binarize(y_true, classes=classes) - if n_classes == 2: - y_bin = np.hstack([1 - y_bin, y_bin]) - all_fpr = np.linspace(0, 1, 300) - mean_tpr = np.zeros_like(all_fpr) - for i in range(n_classes): - f, t, _ = roc_curve(y_bin[:, i], y_proba[:, i]) - mean_tpr += np.interp(all_fpr, f, t) - mean_tpr /= n_classes; mean_tpr[-1] = 1.0 - return all_fpr, mean_tpr, auc_score(all_fpr, mean_tpr) - - fpr_tr, tpr_tr, auc_tr = macro_roc(y_train, train_proba) - fpr_vl, tpr_vl, auc_vl = macro_roc(y_val, val_proba) - - plt.figure(figsize=(10, 8)) - plt.plot(fpr_tr, tpr_tr, color='#e41a1c', lw=2.5, + xticklabels=classes, yticklabels=classes, annot_kws={'fontsize':11}) + plt.xlabel('Predicted',fontsize=12); plt.ylabel('True',fontsize=12) + plt.title(title,fontsize=13,fontweight='bold'); plt.tight_layout() + for ext,dpi in [('pdf',300),('png',150)]: + plt.savefig(os.path.join(rf,f'{prefix}.{ext}'),format=ext,bbox_inches='tight',dpi=dpi) + plt.close(); return cm + + +def _macro_roc_curve(y_true, y_proba, classes): + n = len(classes) + yb = label_binarize(y_true, classes=classes) + if n == 2: yb = np.hstack([1 - yb, yb]) + all_fpr = np.linspace(0,1,300); mt = np.zeros_like(all_fpr) + for i in range(n): + f,t,_ = roc_curve(yb[:,i], y_proba[:,i]) + mt += np.interp(all_fpr, f, t) + mt /= n; mt[-1] = 1.0 + return all_fpr, mt, auc_score(all_fpr, mt) + + +def plot_train_vs_val_roc(y_train, tp, y_val, vp, classes, mn, prefix, rf): + fpr_tr,tpr_tr,auc_tr = _macro_roc_curve(y_train, tp, classes) + fpr_vl,tpr_vl,auc_vl = _macro_roc_curve(y_val, vp, classes) + plt.figure(figsize=(10,8)) + plt.plot(fpr_tr,tpr_tr,color='#e41a1c',lw=2.5, label=f'Training set (Macro AUC={auc_tr:.3f})') - plt.plot(fpr_vl, tpr_vl, color='#377eb8', lw=2.5, linestyle='--', + plt.plot(fpr_vl,tpr_vl,color='#377eb8',lw=2.5,linestyle='--', label=f'Internal validation / CV-OOF (Macro AUC={auc_vl:.3f})') - plt.plot([0,1],[0,1],'--',color='#cccccc',lw=1) + plt.plot([0,1],[0,1],'--',color='#ccc',lw=1) plt.xlim([-0.02,1.02]); plt.ylim([-0.02,1.02]) - plt.xlabel('False Positive Rate', fontsize=13) - plt.ylabel('True Positive Rate', fontsize=13) - plt.title(f'ROC — Train vs Internal Validation — {model_name}', - fontsize=14, fontweight='bold') - plt.legend(loc='lower right', fontsize=11) - plt.grid(True, alpha=0.15); plt.tight_layout() - plt.savefig(os.path.join(rf, f'{filepath_prefix}.pdf'), - format='pdf', bbox_inches='tight', dpi=300) - plt.savefig(os.path.join(rf, f'{filepath_prefix}.png'), - format='png', bbox_inches='tight', dpi=150) - plt.close() - return auc_tr, auc_vl - - -def plot_train_vs_val_pr(y_train, train_proba, y_val, val_proba, - classes, model_name, filepath_prefix, rf): - """Overlay train-set PR and internal-validation PR.""" - n_classes = len(classes) - - def macro_pr(y_true, y_proba): - y_bin = label_binarize(y_true, classes=classes) - if n_classes == 2: - y_bin = np.hstack([1 - y_bin, y_bin]) - all_rec = np.linspace(0, 1, 300) - mean_prec = np.zeros_like(all_rec) - for i in range(n_classes): - prec, rec, _ = precision_recall_curve(y_bin[:, i], y_proba[:, i]) - mean_prec += np.interp(all_rec, rec[::-1], prec[::-1]) - mean_prec /= n_classes - return all_rec, mean_prec, auc_score(all_rec, mean_prec) - - rec_tr, prec_tr, ap_tr = macro_pr(y_train, train_proba) - rec_vl, prec_vl, ap_vl = macro_pr(y_val, val_proba) - - plt.figure(figsize=(10, 8)) - plt.plot(rec_tr, prec_tr, color='#e41a1c', lw=2.5, - label=f'Training set (Macro AP={ap_tr:.3f})') - plt.plot(rec_vl, prec_vl, color='#377eb8', lw=2.5, linestyle='--', + plt.xlabel('False Positive Rate',fontsize=13); plt.ylabel('True Positive Rate',fontsize=13) + plt.title(f'ROC — Train vs Internal Validation — {mn}',fontsize=14,fontweight='bold') + plt.legend(loc='lower right',fontsize=11); plt.grid(True,alpha=0.15); plt.tight_layout() + for ext,dpi in [('pdf',300),('png',150)]: + plt.savefig(os.path.join(rf,f'{prefix}.{ext}'),format=ext,bbox_inches='tight',dpi=dpi) + plt.close(); return auc_tr, auc_vl + + +def plot_train_vs_val_pr(y_train, tp, y_val, vp, classes, mn, prefix, rf): + n = len(classes) + def macro_pr(yt, yp): + yb = label_binarize(yt, classes=classes) + if n==2: yb = np.hstack([1-yb, yb]) + ar = np.linspace(0,1,300); mp = np.zeros_like(ar) + for i in range(n): + p,r,_ = precision_recall_curve(yb[:,i], yp[:,i]) + mp += np.interp(ar, r[::-1], p[::-1]) + mp /= n; return ar, mp, auc_score(ar, mp) + ar_tr,mp_tr,ap_tr = macro_pr(y_train, tp) + ar_vl,mp_vl,ap_vl = macro_pr(y_val, vp) + plt.figure(figsize=(10,8)) + plt.plot(ar_tr,mp_tr,color='#e41a1c',lw=2.5,label=f'Training set (Macro AP={ap_tr:.3f})') + plt.plot(ar_vl,mp_vl,color='#377eb8',lw=2.5,linestyle='--', label=f'Internal validation / CV-OOF (Macro AP={ap_vl:.3f})') plt.xlim([-0.02,1.02]); plt.ylim([-0.02,1.02]) - plt.xlabel('Recall', fontsize=13); plt.ylabel('Precision', fontsize=13) - plt.title(f'PR — Train vs Internal Validation — {model_name}', - fontsize=14, fontweight='bold') - plt.legend(loc='lower left', fontsize=11) - plt.grid(True, alpha=0.15); plt.tight_layout() - plt.savefig(os.path.join(rf, f'{filepath_prefix}.pdf'), - format='pdf', bbox_inches='tight', dpi=300) - plt.savefig(os.path.join(rf, f'{filepath_prefix}.png'), - format='png', bbox_inches='tight', dpi=150) - plt.close() - return ap_tr, ap_vl + plt.xlabel('Recall',fontsize=13); plt.ylabel('Precision',fontsize=13) + plt.title(f'PR — Train vs Internal Validation — {mn}',fontsize=14,fontweight='bold') + plt.legend(loc='lower left',fontsize=11); plt.grid(True,alpha=0.15); plt.tight_layout() + for ext,dpi in [('pdf',300),('png',150)]: + plt.savefig(os.path.join(rf,f'{prefix}.{ext}'),format=ext,bbox_inches='tight',dpi=dpi) + plt.close(); return ap_tr, ap_vl # ============================================================================ -# Bootstrap AUC test [FIX-2 retained] +# [v4-3/4] Per-model SHAP + feature ablation with DeLong vs full-feature # ============================================================================ -def bootstrap_auc_test(y_true, proba_a, proba_b, classes, - n_bootstrap=2000, seed=42): - rng = np.random.RandomState(seed) - n = len(y_true) - n_classes = len(classes) +def run_shap_for_model(mn, mo, X, fnames, n_classes, SHAPSZ, RS): + """Compute mean |SHAP| importances for one model. Returns sorted DataFrame.""" + Xshap = X.values + ns = min(SHAPSZ, Xshap.shape[0]) + np.random.seed(RS) + sidx = np.random.choice(Xshap.shape[0], ns, replace=False) + Xs = Xshap[sidx] + if mn in ['RF','XGB','DT','AdaBoost']: + exp = shap.TreeExplainer(mo); sv = exp.shap_values(Xs) + else: + bg = Xs[np.random.choice(ns, min(50,ns), replace=False)] + exp = shap.KernelExplainer(lambda x, m=mo: m.predict_proba(x), bg) + sv = exp.shap_values(Xs) + # Robust shape handling + if isinstance(sv, list): + sv_abs = np.mean([np.abs(s) for s in sv], axis=0) + elif sv.ndim == 3: + sv_abs = np.mean(np.abs(sv), axis=(2 if sv.shape[2] <= sv.shape[1] else 1)) + else: + sv_abs = np.abs(sv) + fi = sv_abs.mean(axis=0) + if len(fi) > len(fnames): fi = fi[:len(fnames)] + elif len(fi) < len(fnames): fi = np.pad(fi,(0,len(fnames)-len(fi))) + return (pd.DataFrame({'Feature': fnames, 'Importance': fi}) + .sort_values('Importance', ascending=False) + .reset_index(drop=True)) + + +def run_ablation_for_model(mn, mcfg, bpd, X, y_mapped, class_indices, + n_classes, top_feats, skf, full_yt, full_yproba, + ALPHA=0.05, N_BOOT=2000): + """ + Incremental feature ablation for one model using its SHAP ranking. - def calc_macro_auc(yt, pa, pb): - try: - if n_classes == 2: - a1 = roc_auc_score(yt, pa[:, 1]) - a2 = roc_auc_score(yt, pb[:, 1]) - else: - a1 = roc_auc_score(yt, pa, multi_class='ovr', average='macro') - a2 = roc_auc_score(yt, pb, multi_class='ovr', average='macro') - return a1, a2 - except: - return 0.0, 0.0 + For each subset size N (1 … len(top_feats)): + - CV OOF predictions on top N features + - Collect OOF proba vectors (same fold splits → directly comparable with full) + - DeLong test: subset OOF AUC vs full-feature OOF AUC - auc_a, auc_b = calc_macro_auc(y_true, proba_a, proba_b) - observed_diff = auc_a - auc_b - diffs = [] - for _ in range(n_bootstrap): - idx = rng.choice(n, n, replace=True) - yt_b = y_true[idx]; pa_b = proba_a[idx]; pb_b = proba_b[idx] - if len(np.unique(yt_b)) < n_classes: - continue - a1, a2 = calc_macro_auc(yt_b, pa_b, pb_b) - diffs.append(a1 - a2) + opt_n = first N where DeLong p >= ALPHA (no significant difference from full) - if len(diffs) < 100: - return 1.0, auc_a, auc_b, -1, 1 + Returns dict with: + fcs, aucs, p_values, z_scores, ci_lows, ci_highs, + opt_n, opt_feats, opt_auc, delong_rows + """ + cf = mcfg[mn] + bp = bpd.get(mn, {}) if isinstance(bpd.get(mn,{}), dict) else {} + full_auc = _macro_auc_single(full_yt, full_yproba, n_classes) + + fcs=[]; aucs=[]; pvals=[]; zscores=[]; ci_los=[]; ci_his=[] + delong_rows=[] + + for nf in range(1, len(top_feats)+1): + Xsub = X[top_feats[:nf]] + # Collect OOF probas in fold order (same skf splits → aligned with full_yt) + sub_yt=[]; sub_yp=[] + for tri,tei in skf.split(Xsub, y_mapped): + mf = deepcopy(cf['model']) + if bp: mf.set_params(**bp) + mf.fit(Xsub.iloc[tri].values, y_mapped.iloc[tri]) + sub_yp.append(mf.predict_proba(Xsub.iloc[tei].values)) + sub_yt.extend(y_mapped.iloc[tei].tolist()) + + sub_yt = np.array(sub_yt) + sub_yproba = np.vstack(sub_yp) + + sub_auc = _macro_auc_single(sub_yt, sub_yproba, n_classes) + if np.isnan(sub_auc): sub_auc = 0.0 + + # DeLong: subset vs full (both evaluated on the same OOF indices) + p, a_sub, a_full, ci_lo, ci_hi, z = delong_test( + full_yt, sub_yproba, full_yproba, class_indices, + n_bootstrap=N_BOOT, seed=42) + + fcs.append(nf); aucs.append(sub_auc) + pvals.append(p); zscores.append(z) + ci_los.append(ci_lo); ci_his.append(ci_hi) + delong_rows.append({ + 'Model': mn, 'N_Features': nf, + 'Subset_AUC': sub_auc, 'Full_AUC': full_auc, + 'AUC_Diff': sub_auc - full_auc, + 'CI_95_Low': ci_lo, 'CI_95_High': ci_hi, + 'Z_score': z, 'P_value': p, + 'Significant': 'No' if p >= ALPHA else 'Yes', + }) - diffs = np.array(diffs) - # [FIX-2] H0: diff = 0, two-sided - p_value = np.mean(np.abs(diffs) >= np.abs(observed_diff)) - p_value = max(p_value, 1.0 / n_bootstrap) - ci_low = np.percentile(diffs, 2.5) - ci_high = np.percentile(diffs, 97.5) - return p_value, auc_a, auc_b, ci_low, ci_high + # opt_n = first N where p >= ALPHA (subset not significantly worse than full) + opt_n = len(top_feats) # fallback: use all + for i, p in enumerate(pvals): + if p >= ALPHA: + opt_n = i + 1; break + + return { + 'fcs': fcs, 'aucs': aucs, 'pvals': pvals, + 'zscores': zscores, 'ci_los': ci_los, 'ci_his': ci_his, + 'opt_n': opt_n, 'opt_feats': top_feats[:opt_n], + 'opt_auc': aucs[opt_n-1], 'full_auc': full_auc, + 'delong_rows': delong_rows, + } + + +def plot_combined_ablation(ablation_results, rf): + """ + [v4-6] One figure with all retained models' ablation curves. + Optimal point of each model is marked with a star. + """ + plt.figure(figsize=(12, 8)) + for i, (mn, res) in enumerate(ablation_results.items()): + c = MODEL_COLORS[i % 8] + on = res['opt_n'] + plt.plot(res['fcs'], res['aucs'], 'o-', color=c, lw=2, ms=4, + label=f'{mn} (opt={on}, AUC={res["opt_auc"]:.3f})') + plt.scatter([on], [res['opt_auc']], s=200, marker='*', + color=c, edgecolors='black', lw=1.5, zorder=6) + # horizontal full-AUC reference (same color, dashed) + plt.axhline(y=res['full_auc'], color=c, ls=':', lw=1, alpha=0.5) + + plt.xlabel('Number of Features', fontsize=13) + plt.ylabel('Macro AUC (CV-OOF)', fontsize=13) + plt.title('Feature Ablation — All Retained Models (★ = DeLong-optimal)', + fontsize=14, fontweight='bold') + plt.legend(loc='lower right', fontsize=9) + plt.grid(True, alpha=0.15); plt.tight_layout() + for ext,dpi in [('pdf',300),('png',150)]: + plt.savefig(os.path.join(rf,f'ablation_combined.{ext}'), + format=ext, bbox_inches='tight', dpi=dpi) + plt.close() + + +def plot_single_ablation(mn, res, rf): + """Per-model ablation plot with p-value annotation.""" + fcs = res['fcs']; aucs = res['aucs'] + pvals = res['pvals']; on = res['opt_n'] + full = res['full_auc'] + + fig, ax1 = plt.subplots(figsize=(12, 7)) + ax2 = ax1.twinx() + + ax1.plot(fcs, aucs, 'o-', color='#2563eb', lw=2, ms=5, label='Macro AUC') + ax1.scatter([on], [aucs[on-1]], s=250, marker='*', + color='#ef4444', edgecolors='black', lw=2, zorder=6, + label=f'Optimal N={on}') + ax1.axhline(y=full, color='gray', ls='--', lw=1.2, alpha=0.6, + label=f'Full-feature AUC={full:.3f}') + ax1.set_xlabel('Number of Features', fontsize=13) + ax1.set_ylabel('Macro AUC (CV-OOF)', fontsize=13, color='#2563eb') + ax1.tick_params(axis='y', labelcolor='#2563eb') + ax1.set_ylim([max(0, min(aucs) - 0.05), min(1.02, max(aucs) + 0.05)]) + + ax2.bar(fcs, [-np.log10(max(p, 1e-6)) for p in pvals], + color='#f59e0b', alpha=0.35, width=0.6, label='-log10(p)') + ax2.axhline(y=-np.log10(0.05), color='#ef4444', ls='-.', lw=1.2, + label='p=0.05 threshold') + ax2.set_ylabel('-log10(DeLong p-value)', fontsize=12, color='#b45309') + ax2.tick_params(axis='y', labelcolor='#b45309') + + lines1, labels1 = ax1.get_legend_handles_labels() + lines2, labels2 = ax2.get_legend_handles_labels() + ax1.legend(lines1 + lines2, labels1 + labels2, + loc='lower right', fontsize=9) + plt.title(f'Feature Ablation + DeLong Test — {mn} (★ Optimal={on})', + fontsize=14, fontweight='bold') + plt.tight_layout() + for ext,dpi in [('pdf',300),('png',150)]: + plt.savefig(os.path.join(rf,f'ablation_{mn}.{ext}'), + format=ext,bbox_inches='tight',dpi=dpi) + plt.close() # ============================================================================ -# [FIX-1] Model configs — XGBoost num_class constructed conditionally +# Model configs # ============================================================================ -ALL_MODEL_NAMES = ['RF', 'DT', 'KNN', 'XGB', 'AdaBoost', 'LR', 'NB', 'SVM'] +ALL_MODEL_NAMES = ['RF','DT','KNN','XGB','AdaBoost','LR','NB','SVM'] def get_models_config(selected, n_classes, rs=42): - xgb_kwargs = dict(random_state=rs, eval_metric='mlogloss', n_jobs=-1) + xgb_kw = dict(random_state=rs, eval_metric='mlogloss', n_jobs=-1) if n_classes > 2: - xgb_kwargs['objective'] = 'multi:softprob' - xgb_kwargs['num_class'] = n_classes + xgb_kw.update(objective='multi:softprob', num_class=n_classes) else: - xgb_kwargs['objective'] = 'binary:logistic' - xgb_kwargs['eval_metric'] = 'logloss' - + xgb_kw.update(objective='binary:logistic', eval_metric='logloss') cfg = { - 'RF': {'model': RandomForestClassifier(random_state=rs, n_jobs=-1), - 'params': {'n_estimators': [100,200], 'max_depth': [20,50], - 'min_samples_split': [2,5]}}, - 'DT': {'model': DecisionTreeClassifier(random_state=rs), - 'params': {'max_depth': [20,50], 'min_samples_split': [2,10], - 'criterion': ['gini','entropy']}}, - 'KNN': {'model': KNeighborsClassifier(n_jobs=-1), - 'params': {'n_neighbors': [3,5,7], - 'weights': ['uniform','distance']}}, - 'XGB': {'model': XGBClassifier(**xgb_kwargs), - 'params': {'n_estimators': [100,200], 'max_depth': [5,7], - 'learning_rate': [0.05,0.1]}}, - 'AdaBoost': {'model': AdaBoostClassifier(random_state=rs), - 'params': {'n_estimators': [50,100], - 'learning_rate': [0.1,0.5,1.0]}}, - 'LR': {'model': LogisticRegression(random_state=rs, n_jobs=-1, - max_iter=2000), - 'params': {'C': [0.1,1,10], 'solver': ['lbfgs']}}, - 'NB': {'model': GaussianNB(), - 'params': {'var_smoothing': [1e-9,1e-7,1e-5]}}, - 'SVM': {'model': SVC(probability=True, random_state=rs, - decision_function_shape='ovr'), - 'params': {'C': [1,10], 'kernel': ['rbf','linear']}}, + 'RF':{'model':RandomForestClassifier(random_state=rs,n_jobs=-1), + 'params':{'n_estimators':[100,200],'max_depth':[20,50],'min_samples_split':[2,5]}}, + 'DT':{'model':DecisionTreeClassifier(random_state=rs), + 'params':{'max_depth':[20,50],'min_samples_split':[2,10],'criterion':['gini','entropy']}}, + 'KNN':{'model':KNeighborsClassifier(n_jobs=-1), + 'params':{'n_neighbors':[3,5,7],'weights':['uniform','distance']}}, + 'XGB':{'model':XGBClassifier(**xgb_kw), + 'params':{'n_estimators':[100,200],'max_depth':[5,7],'learning_rate':[0.05,0.1]}}, + 'AdaBoost':{'model':AdaBoostClassifier(random_state=rs), + 'params':{'n_estimators':[50,100],'learning_rate':[0.1,0.5,1.0]}}, + 'LR':{'model':LogisticRegression(random_state=rs,n_jobs=-1,max_iter=2000), + 'params':{'C':[0.1,1,10],'solver':['lbfgs']}}, + 'NB':{'model':GaussianNB(), + 'params':{'var_smoothing':[1e-9,1e-7,1e-5]}}, + 'SVM':{'model':SVC(probability=True,random_state=rs,decision_function_shape='ovr'), + 'params':{'C':[1,10],'kernel':['rbf','linear']}}, } - return {k: v for k, v in cfg.items() if k in selected} + return {k:v for k,v in cfg.items() if k in selected} # ============================================================================ @@ -524,833 +602,609 @@ def get_models_config(selected, n_classes, rs=42): # ============================================================================ def run_pipeline( train_file, val_file1, val_file2, val_file3, n_classes_select, - selected_models, enable_tuning, - cv_folds, top_n_features, shap_sample_size, + selected_models, enable_tuning, cv_folds, top_n_features, shap_sample_size, progress=gr.Progress(track_tqdm=True), ): if train_file is None: return None, "❌ 请先上传训练集 CSV 文件" - sel = (selected_models if isinstance(selected_models, list) + sel = (selected_models if isinstance(selected_models,list) else [s.strip() for s in str(selected_models).split(",") if s.strip()]) if not sel: return None, "❌ 请至少选择一个模型" - RS = 42; CVF = int(cv_folds) - TOPN = int(top_n_features); SHAPSZ = int(shap_sample_size) - TUNING = bool(enable_tuning) + RS=42; CVF=int(cv_folds); TOPN=int(top_n_features) + SHAPSZ=int(shap_sample_size); TUNING=bool(enable_tuning); ALPHA=0.05 - L = [] - def log(m): L.append(str(m)) - - rf = tempfile.mkdtemp(prefix="ml_") + L=[]; log=lambda m: L.append(str(m)) + rf=tempfile.mkdtemp(prefix="ml_") try: - # ── Load Data ── + # ── Load ────────────────────────────────────────────────────────── progress(0.02, desc="📂 加载数据...") - log("━" * 60) - log(" 🧬 ML 多分类模型训练与评估系统 v3") - log("━" * 60) + log("━"*60); log(" 🧬 ML 多分类模型训练与评估系统 v4"); log("━"*60) - tp = (train_file if isinstance(train_file, str) - else getattr(train_file, 'name', str(train_file))) + tp = train_file if isinstance(train_file,str) else getattr(train_file,'name',str(train_file)) data = pd.read_csv(tp) - - y = data.iloc[:, 0] - col2 = data.iloc[:, 1] - col2_is_id = ((col2.dtype == 'object') or - (col2.nunique() / len(col2) > 0.5)) - if col2_is_id: - X = data.iloc[:, 2:] - log(f" 📋 CSV: Col1=Label, Col2=ID({data.columns[1]}), Col3+=Features") - else: - X = data.iloc[:, 1:] - log(f" 📋 CSV: Col1=Label, Col2+=Features (no ID column)") + y = data.iloc[:,0] + col2 = data.iloc[:,1] + col2_is_id = (col2.dtype=='object') or (col2.nunique()/len(col2)>0.5) + X = data.iloc[:,2:] if col2_is_id else data.iloc[:,1:] fnames = X.columns.tolist() + log(f" 📋 CSV: Col1=Label, {'Col2=ID, Col3+=Features' if col2_is_id else 'Col2+=Features'}") - user_n = int(str(n_classes_select).split(" ")[0]) + user_n = int(str(n_classes_select).split()[0]) detected_classes = sorted(y.unique()) - detected_classes = [int(c) if hasattr(c, 'item') else c - for c in detected_classes] - detected_n = len(detected_classes) - - if detected_n != user_n: - return None, ( - f"❌ 您选择了 {user_n} 分类,但数据中检测到 {detected_n} 个类别: " - f"{detected_classes}\n请将分类数修改为 {detected_n},或检查数据标签列") - - classes = detected_classes - n_classes = user_n - log(f" ✅ {n_classes} 分类 — 数据验证通过") - - label_map = {c: i for i, c in enumerate(classes)} - label_map_inv = {i: c for c, i in label_map.items()} - y_mapped = y.map(label_map) - class_indices = list(range(n_classes)) - - log(f" 📊 训练集: {X.shape[0]} 样本 × {X.shape[1]} 特征") - log(f" 🏷️ 类别数: {n_classes} 类 — {classes}") - log(f" 📊 分布: {dict(y.value_counts().sort_index())}") - log(f" 🤖 模型: {', '.join(sel)}") - log(f" 🔧 调优: {'开启' if TUNING else '关闭'} | CV: {CVF}折") - - if n_classes < 2 or n_classes > 8: - return None, f"❌ 仅支持 2~8 分类,当前检测到 {n_classes} 类" - - task_type = "Binary" if n_classes == 2 else f"{n_classes}-Class" - task_type_cn = "二分类" if n_classes == 2 else f"{n_classes}分类" + detected_classes = [int(c) if hasattr(c,'item') else c for c in detected_classes] + if len(detected_classes) != user_n: + return None,(f"❌ 选择 {user_n} 分类但检测到 {len(detected_classes)} 类: " + f"{detected_classes}") + + classes = detected_classes; n_classes=user_n + label_map = {c:i for i,c in enumerate(classes)} + y_mapped = y.map(label_map) + class_indices= list(range(n_classes)) + task_type = "Binary" if n_classes==2 else f"{n_classes}-Class" + task_type_cn = "二分类" if n_classes==2 else f"{n_classes}分类" + + log(f" ✅ {n_classes} 分类 | {X.shape[0]} 样本 × {X.shape[1]} 特征") + log(f" 🏷️ 类别: {classes} 分布: {dict(y.value_counts().sort_index())}") + log(f" 🤖 模型: {', '.join(sel)} 调优: {'开启' if TUNING else '关闭'} CV: {CVF}折") log(f" 📋 任务: {task_type_cn} ({task_type})") + if n_classes<2 or n_classes>8: + return None,f"❌ 仅支持 2~8 分类,检测到 {n_classes} 类" + mcfg = get_models_config(sel, n_classes, RS) skf = StratifiedKFold(n_splits=CVF, shuffle=True, random_state=RS) - COLORS = ['#2563eb','#f59e0b','#10b981','#ef4444', - '#8b5cf6','#ec4899','#06b6d4','#6b7280'] - - bpd = {} # best params - amr = {} # all model results (CV-OOF) - tms = {} # trained models (full data) - train_results = {} # metrics on full training set - - total = len(mcfg) + bpd={}; amr={}; tms={}; train_results={} + total=len(mcfg) # ── Train All Models ────────────────────────────────────────────── - for mi, (mn, cf) in enumerate(mcfg.items()): - pv = 0.05 + 0.35 * mi / total - progress(pv, desc=f"🏋️ [{mi+1}/{total}] 训练 {mn}...") - log(f"\n{'─'*50}") - log(f" 🔄 [{mi+1}/{total}] {mn}") - - Xv = X.values + for mi,(mn,cf) in enumerate(mcfg.items()): + progress(0.05+0.32*mi/total, desc=f"🏋️ [{mi+1}/{total}] 训练 {mn}...") + log(f"\n{'─'*50}"); log(f" 🔄 [{mi+1}/{total}] {mn}") + Xv=X.values - # Optional GridSearch if TUNING: log(f" ⏳ GridSearchCV (CV={CVF})...") - scoring = 'roc_auc_ovr' if n_classes > 2 else 'roc_auc' - gs = GridSearchCV(cf['model'], cf['params'], cv=skf, - scoring=scoring, n_jobs=-1, verbose=0) - gs.fit(Xv, y_mapped) - bp = gs.best_params_; bpd[mn] = bp + scoring='roc_auc_ovr' if n_classes>2 else 'roc_auc' + gs=GridSearchCV(cf['model'],cf['params'],cv=skf, + scoring=scoring,n_jobs=-1,verbose=0) + gs.fit(Xv,y_mapped); bp=gs.best_params_; bpd[mn]=bp log(f" ✓ 最佳CV Score: {gs.best_score_:.4f}") else: - bp = {}; bpd[mn] = "Default" + bp={}; bpd[mn]="Default" - # Fit final model on full training set - mdl = deepcopy(cf['model']) + mdl=deepcopy(cf['model']) if bp: mdl.set_params(**bp) - mdl.fit(Xv, y_mapped) - tms[mn] = mdl + mdl.fit(Xv,y_mapped); tms[mn]=mdl # Training-set metrics - train_proba_full = mdl.predict_proba(Xv) - train_pred_full = mdl.predict(Xv) - train_met = compute_multiclass_metrics( - y_mapped.values, train_pred_full, - train_proba_full, class_indices) - train_results[mn] = { - 'proba': train_proba_full, - 'pred': train_pred_full, - 'metrics': train_met, - } - - # ── CV evaluation (OOF = Internal Validation) ── - all_yt = []; all_yp = []; all_yproba = [] - fold_metrics = [] - - for fi, (tri, tei) in enumerate(skf.split(X, y_mapped), 1): - Xtr, Xte = X.iloc[tri].values, X.iloc[tei].values - ytr, yte = y_mapped.iloc[tri], y_mapped.iloc[tei] - mf = deepcopy(cf['model']) + tp_full=mdl.predict_proba(Xv); td_full=mdl.predict(Xv) + tm=compute_multiclass_metrics(y_mapped.values,td_full,tp_full,class_indices) + train_results[mn]={'proba':tp_full,'pred':td_full,'metrics':tm} + + # CV-OOF + all_yt=[]; all_yp=[]; all_yproba=[]; fold_metrics=[] + for fi,(tri,tei) in enumerate(skf.split(X,y_mapped),1): + mf=deepcopy(cf['model']) if bp: mf.set_params(**bp) - mf.fit(Xtr, ytr) - ypred = mf.predict(Xte) - yproba = mf.predict_proba(Xte) - all_yt.extend(yte) - all_yp.extend(ypred) + mf.fit(X.iloc[tri].values,y_mapped.iloc[tri]) + ypred=mf.predict(X.iloc[tei].values) + yproba=mf.predict_proba(X.iloc[tei].values) + all_yt.extend(y_mapped.iloc[tei]); all_yp.extend(ypred) all_yproba.append(yproba) - - fm = compute_multiclass_metrics(yte, ypred, yproba, class_indices) - # [v3-2] Extended fold row - fold_metrics.append({ - 'Fold': fi, - 'AUC': fm['Macro_AUC'], - 'Accuracy': fm['Accuracy'], - 'Sensitivity': fm['Macro_Sensitivity'], - 'Specificity': fm['Macro_Specificity'], - 'PPV': fm['Macro_PPV'], - 'NPV': fm['Macro_NPV'], - 'F1': fm['Macro_F1'], - 'Weighted_F1': fm['Weighted_F1'], - 'Kappa': fm['Kappa'], - }) - - all_yt = np.array(all_yt) - all_yp = np.array(all_yp) - all_yproba = np.vstack(all_yproba) - - # Build fold table with Mean row - fdf = pd.DataFrame(fold_metrics) - mean_row = { - col: (fdf[col].mean() if col != 'Fold' else 'Mean') - for col in fdf.columns + fm=compute_multiclass_metrics(y_mapped.iloc[tei],ypred,yproba,class_indices) + fold_metrics.append({'Fold':fi,'AUC':fm['Macro_AUC'], + 'Accuracy':fm['Accuracy'],'Sensitivity':fm['Macro_Sensitivity'], + 'Specificity':fm['Macro_Specificity'],'PPV':fm['Macro_PPV'], + 'NPV':fm['Macro_NPV'],'F1':fm['Macro_F1'], + 'Weighted_F1':fm['Weighted_F1'],'Kappa':fm['Kappa']}) + + all_yt=np.array(all_yt); all_yp=np.array(all_yp) + all_yproba=np.vstack(all_yproba) + fdf=pd.DataFrame(fold_metrics) + mr={c:(fdf[c].mean() if c!='Fold' else 'Mean') for c in fdf.columns} + fdf=pd.concat([fdf,pd.DataFrame([mr])],ignore_index=True) + oof_met=compute_multiclass_metrics(all_yt,all_yp,all_yproba,class_indices) + + amr[mn]={ + 'fold_df':fdf, + 'mean_auc':mr['AUC'],'mean_acc':mr['Accuracy'], + 'mean_sens':mr['Sensitivity'],'mean_spec':mr['Specificity'], + 'mean_ppv':mr['PPV'],'mean_npv':mr['NPV'], + 'mean_f1':mr['F1'],'mean_wf1':mr['Weighted_F1'], + 'mean_kappa':mr['Kappa'], + 'oof_metrics':oof_met, + 'all_yt':all_yt,'all_yp':all_yp,'all_yproba':all_yproba, } - fdf = pd.concat([fdf, pd.DataFrame([mean_row])], ignore_index=True) - - # OOF aggregate metrics (computed on concatenated OOF predictions) - oof_met = compute_multiclass_metrics( - all_yt, all_yp, all_yproba, class_indices) - - amr[mn] = { - 'fold_df': fdf, - 'mean_auc': mean_row['AUC'], - 'mean_acc': mean_row['Accuracy'], - 'mean_sens': mean_row['Sensitivity'], - 'mean_spec': mean_row['Specificity'], - 'mean_ppv': mean_row['PPV'], - 'mean_npv': mean_row['NPV'], - 'mean_f1': mean_row['F1'], - 'mean_wf1': mean_row['Weighted_F1'], - 'mean_kappa': mean_row['Kappa'], - 'oof_metrics': oof_met, - 'all_yt': all_yt, - 'all_yp': all_yp, - 'all_yproba': all_yproba, - } - - # [v3-7] Log all key metrics - tm = train_met; vm = mean_row log(f" ✅ [Train] AUC={tm['Macro_AUC']:.4f} Acc={tm['Accuracy']:.4f} " f"Sens={tm['Macro_Sensitivity']:.4f} Spec={tm['Macro_Specificity']:.4f} " f"PPV={tm['Macro_PPV']:.4f} NPV={tm['Macro_NPV']:.4f} " f"F1={tm['Macro_F1']:.4f} Kappa={tm['Kappa']:.4f}") - log(f" ✅ [CV-OOF] AUC={vm['AUC']:.4f} Acc={vm['Accuracy']:.4f} " - f"Sens={vm['Sensitivity']:.4f} Spec={vm['Specificity']:.4f} " - f"PPV={vm['PPV']:.4f} NPV={vm['NPV']:.4f} " - f"F1={vm['F1']:.4f} Kappa={vm['Kappa']:.4f}") + log(f" ✅ [CV-OOF] AUC={mr['AUC']:.4f} Acc={mr['Accuracy']:.4f} " + f"Sens={mr['Sensitivity']:.4f} Spec={mr['Specificity']:.4f} " + f"PPV={mr['PPV']:.4f} NPV={mr['NPV']:.4f} " + f"F1={mr['F1']:.4f} Kappa={mr['Kappa']:.4f}") - mnames = list(amr.keys()); nm = len(mnames) - log(f"\n{'━'*60}") - log(f" ✅ {nm} 个模型训练完成") + mnames=list(amr.keys()); nm=len(mnames) + log(f"\n{'━'*60}"); log(f" ✅ {nm} 个模型训练完成") - # ── Training-set ROC / PR / CM for every model ─────────────────── - progress(0.40, desc="📈 训练集曲线...") - log(f"\n 📈 绘制训练集 ROC / PR / CM...") + # ── Training-set plots ──────────────────────────────────────────── + progress(0.39, desc="📈 训练集曲线...") + log(f"\n 📈 绘制训练集 ROC/PR/CM...") for mn in mnames: - tr = train_results[mn] - tm = tr['metrics'] - plot_multiclass_roc( - y_mapped.values, tr['proba'], class_indices, + tr=train_results[mn]; tm=tr['metrics'] + plot_multiclass_roc(y_mapped.values,tr['proba'],class_indices, f'ROC (Train) — {mn} ({task_type}, AUC={tm["Macro_AUC"]:.3f})', - f'roc_train_{mn}', rf) - plot_multiclass_pr( - y_mapped.values, tr['proba'], class_indices, - f'PR (Train) — {mn} ({task_type})', - f'pr_train_{mn}', rf) - plot_confusion_matrix( - y_mapped.values, tr['pred'], class_indices, - f'CM (Train) — {mn} (Acc={tm["Accuracy"]:.3f})', - f'cm_train_{mn}', rf) - - # Combined training-set ROC (all models, macro) - plt.figure(figsize=(10, 8)) - for i, mn in enumerate(mnames): - tr = train_results[mn] - y_bin = label_binarize(y_mapped.values, classes=class_indices) - if n_classes == 2: y_bin = np.hstack([1 - y_bin, y_bin]) - all_fpr = np.linspace(0, 1, 200); mean_tpr = np.zeros_like(all_fpr) - for c in range(n_classes): - f, t, _ = roc_curve(y_bin[:, c], tr['proba'][:, c]) - mean_tpr += np.interp(all_fpr, f, t) - mean_tpr /= n_classes; mean_tpr[-1] = 1.0 - ma = auc_score(all_fpr, mean_tpr) - plt.plot(all_fpr, mean_tpr, color=COLORS[i%8], lw=2.5, - label=f'{mn} (Macro AUC={ma:.3f})') + f'roc_train_{mn}',rf) + plot_multiclass_pr(y_mapped.values,tr['proba'],class_indices, + f'PR (Train) — {mn} ({task_type})',f'pr_train_{mn}',rf) + plot_confusion_matrix(y_mapped.values,tr['pred'],class_indices, + f'CM (Train) — {mn} (Acc={tm["Accuracy"]:.3f})',f'cm_train_{mn}',rf) + + # Combined training ROC + plt.figure(figsize=(10,8)) + for i,mn in enumerate(mnames): + tr=train_results[mn] + fpr,tpr,ma=_macro_roc_curve(y_mapped.values,tr['proba'],class_indices) + plt.plot(fpr,tpr,color=MODEL_COLORS[i%8],lw=2.5,label=f'{mn} (AUC={ma:.3f})') plt.plot([0,1],[0,1],'--',color='#ccc',lw=1) plt.xlim([-0.02,1.02]); plt.ylim([-0.02,1.02]) plt.xlabel('FPR',fontsize=13); plt.ylabel('TPR',fontsize=13) - plt.title(f'ROC (Train) — All Models ({task_type})', - fontsize=14, fontweight='bold') - plt.legend(loc='lower right',fontsize=10) - plt.grid(True,alpha=0.15); plt.tight_layout() - plt.savefig(os.path.join(rf,'roc_train_all.pdf'), - format='pdf',bbox_inches='tight',dpi=300) - plt.savefig(os.path.join(rf,'roc_train_all.png'), - format='png',bbox_inches='tight',dpi=150) + plt.title(f'ROC (Train) — All Models ({task_type})',fontsize=14,fontweight='bold') + plt.legend(loc='lower right',fontsize=10); plt.grid(True,alpha=0.15); plt.tight_layout() + for ext,dpi in [('pdf',300),('png',150)]: + plt.savefig(os.path.join(rf,f'roc_train_all.{ext}'),format=ext,bbox_inches='tight',dpi=dpi) plt.close() - # ── CV-OOF ROC / PR / CM ───────────────────────────────────────── - progress(0.44, desc="📈 内部验证ROC曲线...") - log(f"\n 📈 绘制内部验证(CV-OOF) ROC / PR / CM...") + # ── CV-OOF plots ───────────────────────────────────────────────── + progress(0.43, desc="📈 内部验证曲线...") + log(f"\n 📈 绘制CV-OOF ROC/PR/CM...") for mn in mnames: - r = amr[mn] - plot_multiclass_roc( - r['all_yt'], r['all_yproba'], class_indices, + r=amr[mn] + plot_multiclass_roc(r['all_yt'],r['all_yproba'],class_indices, f'ROC (Internal Val) — {mn} ({task_type}, AUC={r["mean_auc"]:.3f})', - f'roc_val_{mn}', rf) - - # Combined CV-OOF ROC - plt.figure(figsize=(10, 8)) - for i, mn in enumerate(mnames): - r = amr[mn] - y_bin = label_binarize(r['all_yt'], classes=class_indices) - if n_classes == 2: y_bin = np.hstack([1 - y_bin, y_bin]) - all_fpr = np.linspace(0, 1, 200); mean_tpr = np.zeros_like(all_fpr) - for c in range(n_classes): - f, t, _ = roc_curve(y_bin[:, c], r['all_yproba'][:, c]) - mean_tpr += np.interp(all_fpr, f, t) - mean_tpr /= n_classes; mean_tpr[-1] = 1.0 - ma = auc_score(all_fpr, mean_tpr) - plt.plot(all_fpr, mean_tpr, color=COLORS[i%8], lw=2.5, - label=f'{mn} (Macro AUC={ma:.3f})') + f'roc_val_{mn}',rf) + plot_multiclass_pr(r['all_yt'],r['all_yproba'],class_indices, + f'PR (Internal Val) — {mn} ({task_type})',f'pr_val_{mn}',rf) + plot_confusion_matrix(r['all_yt'],r['all_yp'],class_indices, + f'CM (Internal Val) — {mn} (Acc={r["mean_acc"]:.3f})',f'cm_val_{mn}',rf) + + # Combined val ROC + plt.figure(figsize=(10,8)) + for i,mn in enumerate(mnames): + r=amr[mn] + fpr,tpr,ma=_macro_roc_curve(r['all_yt'],r['all_yproba'],class_indices) + plt.plot(fpr,tpr,color=MODEL_COLORS[i%8],lw=2.5,label=f'{mn} (AUC={ma:.3f})') plt.plot([0,1],[0,1],'--',color='#ccc',lw=1) plt.xlim([-0.02,1.02]); plt.ylim([-0.02,1.02]) plt.xlabel('FPR',fontsize=13); plt.ylabel('TPR',fontsize=13) - plt.title(f'ROC (Internal Val / CV-OOF) — All Models ({task_type})', - fontsize=14, fontweight='bold') - plt.legend(loc='lower right',fontsize=10) - plt.grid(True,alpha=0.15); plt.tight_layout() - plt.savefig(os.path.join(rf,'roc_val_all.pdf'), - format='pdf',bbox_inches='tight',dpi=300) - plt.savefig(os.path.join(rf,'roc_val_all.png'), - format='png',bbox_inches='tight',dpi=150) + plt.title(f'ROC (Internal Val) — All Models ({task_type})',fontsize=14,fontweight='bold') + plt.legend(loc='lower right',fontsize=10); plt.grid(True,alpha=0.15); plt.tight_layout() + for ext,dpi in [('pdf',300),('png',150)]: + plt.savefig(os.path.join(rf,f'roc_val_all.{ext}'),format=ext,bbox_inches='tight',dpi=dpi) plt.close() - progress(0.48, desc="📈 PR曲线...") - for mn in mnames: - r = amr[mn] - plot_multiclass_pr( - r['all_yt'], r['all_yproba'], class_indices, - f'PR (Internal Val) — {mn} ({task_type})', - f'pr_val_{mn}', rf) + # ── [v4-1/2] Step 1: DeLong test — all models vs best ──────────── + progress(0.47, desc="🔬 Step1: DeLong模型筛选...") + ref_mn = max(amr, key=lambda x: amr[x]['mean_auc']) + ref_auc = amr[ref_mn]['mean_auc'] + log(f"\n{'━'*60}") + log(f" 🔬 Step 1 — DeLong检验 (参照模型: {ref_mn}, AUC={ref_auc:.4f})") + log(f" α=0.05,p≥0.05 → 保留(与最佳无统计学差异)") - progress(0.51, desc="📊 混淆矩阵...") - for mn in mnames: - r = amr[mn] - plot_confusion_matrix( - r['all_yt'], r['all_yp'], class_indices, - f'CM (Internal Val) — {mn} (Acc={r["mean_acc"]:.3f})', - f'cm_val_{mn}', rf) - - # ── Bootstrap AUC Test ──────────────────────────────────────────── - progress(0.54, desc="🔬 Bootstrap AUC 检验...") - best_mn = max(amr, key=lambda x: amr[x]['mean_auc']) - best_auc = amr[best_mn]['mean_auc'] - log(f"\n 🏆 最佳模型: {best_mn} (Macro AUC={best_auc:.4f})") - log(f" 🔬 Bootstrap 检验 (n=2000, α=0.05)...") - - ALPHA = 0.05 - bootstrap_results = [] - retained = [best_mn] + delong_step1_rows=[]; delong_retained=[ref_mn] for om in mnames: - if om == best_mn: + if om==ref_mn: + delong_step1_rows.append({ + 'Model':om,'AUC':amr[om]['mean_auc'], + 'vs_Best':'-','AUC_Diff':0, + 'CI_95_Low':0,'CI_95_High':0, + 'Z_score':0,'P_value':1.0, + 'Decision':'Best (reference)', + }) continue - p_val, auc_a, auc_b, ci_lo, ci_hi = bootstrap_auc_test( - amr[best_mn]['all_yt'], - amr[best_mn]['all_yproba'], + p,a_ref,a_om,ci_lo,ci_hi,z = delong_test( + amr[ref_mn]['all_yt'], + amr[ref_mn]['all_yproba'], amr[om]['all_yproba'], class_indices, n_bootstrap=2000) - dec = "Retained" if p_val >= ALPHA else "Excluded" - if p_val >= ALPHA: - retained.append(om) - bootstrap_results.append({ - 'Model_A': best_mn, 'AUC_A': auc_a, - 'Model_B': om, 'AUC_B': auc_b, - 'AUC_Diff': auc_a - auc_b, - 'CI_95_Low': ci_lo, 'CI_95_High': ci_hi, - 'P_value': p_val, 'Decision': dec, + dec = "Retained" if p>=ALPHA else "Excluded" + if p>=ALPHA: delong_retained.append(om) + delong_step1_rows.append({ + 'Model':om,'AUC':amr[om]['mean_auc'], + 'vs_Best':ref_mn,'AUC_Diff':a_ref-a_om, + 'CI_95_Low':ci_lo,'CI_95_High':ci_hi, + 'Z_score':z,'P_value':p,'Decision':dec, }) - log(f" {best_mn} vs {om}: ΔAUC={auc_a-auc_b:+.4f} " + log(f" {ref_mn} vs {om}: ΔAUC={a_ref-a_om:+.4f} " f"95%CI=[{ci_lo:+.4f},{ci_hi:+.4f}] " - f"P={p_val:.4f} → {dec}") - - bootstrap_df = (pd.DataFrame(bootstrap_results) - .sort_values('P_value', ascending=False) - if bootstrap_results else pd.DataFrame()) - log(f" ✅ 保留 {len(retained)}/{nm} 个模型: {', '.join(retained)}") - - # ── Best model: Train vs Internal Val overlay ───────────────────── - progress(0.57, desc="📈 Train vs Val 对比图...") - log(f"\n 📈 最佳模型 {best_mn}: Train vs Internal Validation 对比...") - auc_tr_b, auc_vl_b = plot_train_vs_val_roc( - y_mapped.values, train_results[best_mn]['proba'], - amr[best_mn]['all_yt'], amr[best_mn]['all_yproba'], - class_indices, best_mn, f'roc_train_vs_val_{best_mn}', rf) - ap_tr_b, ap_vl_b = plot_train_vs_val_pr( - y_mapped.values, train_results[best_mn]['proba'], - amr[best_mn]['all_yt'], amr[best_mn]['all_yproba'], - class_indices, best_mn, f'pr_train_vs_val_{best_mn}', rf) - log(f" ROC — Train AUC={auc_tr_b:.4f} / Val AUC={auc_vl_b:.4f}") - log(f" PR — Train AP={ap_tr_b:.4f} / Val AP={ap_vl_b:.4f}") - - # ── SHAP ────────────────────────────────────────────────────────── - progress(0.60, desc="🔥 SHAP分析...") - log(f"\n 🔥 SHAP特征分析 (保留模型中 Top 3)...") - shap_imp = {} - models_for_shap = sorted(retained, - key=lambda x: amr[x]['mean_auc'], - reverse=True)[:3] - - for si, mn in enumerate(models_for_shap): - progress(0.60 + 0.10 * si / max(len(models_for_shap), 1), + f"Z={z:+.3f} P={p:.4f} → {dec}") + + delong_step1_df = (pd.DataFrame(delong_step1_rows) + .sort_values('AUC',ascending=False)) + log(f" ✅ Step1保留 {len(delong_retained)}/{nm} 个模型: " + f"{', '.join(delong_retained)}") + + # Train vs Val overlay for best model + progress(0.50, desc="📈 Train vs Val 对比...") + log(f"\n 📈 参照模型 {ref_mn}: Train vs Internal Validation...") + auc_tr_r,auc_vl_r=plot_train_vs_val_roc( + y_mapped.values,train_results[ref_mn]['proba'], + amr[ref_mn]['all_yt'],amr[ref_mn]['all_yproba'], + class_indices,ref_mn,f'roc_train_vs_val_{ref_mn}',rf) + ap_tr_r,ap_vl_r=plot_train_vs_val_pr( + y_mapped.values,train_results[ref_mn]['proba'], + amr[ref_mn]['all_yt'],amr[ref_mn]['all_yproba'], + class_indices,ref_mn,f'pr_train_vs_val_{ref_mn}',rf) + log(f" ROC Train={auc_tr_r:.4f} / Val={auc_vl_r:.4f}") + log(f" PR Train={ap_tr_r:.4f} / Val={ap_vl_r:.4f}") + + # ── [v4-3] SHAP for all delong_retained models ─────────────────── + progress(0.53, desc="🔥 SHAP分析...") + log(f"\n 🔥 SHAP — 对所有保留模型计算特征重要性...") + shap_imp={} + for si,mn in enumerate(delong_retained): + progress(0.53+0.08*si/max(len(delong_retained),1), desc=f"🔥 SHAP: {mn}...") - mo = tms[mn]; Xshap = X.values - ns = min(SHAPSZ, Xshap.shape[0]) - np.random.seed(RS) - sidx = np.random.choice(Xshap.shape[0], ns, replace=False) - Xs = Xshap[sidx] try: - if mn in ['RF', 'XGB', 'DT', 'AdaBoost']: - exp = shap.TreeExplainer(mo) - sv = exp.shap_values(Xs) - else: - bg = Xs[np.random.choice(ns, min(50, ns), replace=False)] - exp = shap.KernelExplainer( - lambda x, m=mo: m.predict_proba(x), bg) - sv = exp.shap_values(Xs) - - # [FIX-3] Robust SHAP shape handling - if isinstance(sv, list): - sv_abs = np.mean([np.abs(s) for s in sv], axis=0) - elif sv.ndim == 3: - if sv.shape[2] == n_classes: - sv_abs = np.mean(np.abs(sv), axis=2) - elif sv.shape[1] == n_classes: - sv_abs = np.mean(np.abs(sv), axis=1) - else: - sv_abs = np.abs(sv).mean(axis=-1) - else: - sv_abs = np.abs(sv) - - fi = sv_abs.mean(axis=0) - if len(fi) > len(fnames): fi = fi[:len(fnames)] - elif len(fi) < len(fnames): - fi = np.pad(fi, (0, len(fnames) - len(fi))) - - idf = (pd.DataFrame({'Feature': fnames, 'Importance': fi}) - .sort_values('Importance', ascending=False)) - shap_imp[mn] = idf - - plt.figure(figsize=(10, max(6, TOPN * 0.3))) - top_df = idf.head(TOPN).iloc[::-1] - plt.barh(top_df['Feature'], top_df['Importance'], - color='#2563eb', alpha=0.8) - plt.xlabel('Mean |SHAP|', fontsize=12) + idf=run_shap_for_model(mn,tms[mn],X,fnames,n_classes,SHAPSZ,RS) + shap_imp[mn]=idf + plt.figure(figsize=(10,max(6,TOPN*0.3))) + top_df=idf.head(TOPN).iloc[::-1] + plt.barh(top_df['Feature'],top_df['Importance'],color='#2563eb',alpha=0.8) + plt.xlabel('Mean |SHAP|',fontsize=12) plt.title(f'SHAP Feature Importance — {mn} (Top {TOPN})', - fontsize=13, fontweight='bold') + fontsize=13,fontweight='bold') plt.tight_layout() - plt.savefig(os.path.join(rf, f'shap_{mn}.pdf'), - format='pdf', bbox_inches='tight') - plt.savefig(os.path.join(rf, f'shap_{mn}.png'), - format='png', bbox_inches='tight', dpi=150) + for ext,dpi in [('pdf',300),('png',150)]: + plt.savefig(os.path.join(rf,f'shap_{mn}.{ext}'), + format=ext,bbox_inches='tight',dpi=dpi) plt.close() - log(f" ✅ {mn} Top3: " - f"{', '.join(idf.head(3)['Feature'].tolist())}") + log(f" ✅ {mn} Top3: {', '.join(idf.head(3)['Feature'].tolist())}") except Exception as e: log(f" ⚠ {mn} SHAP失败: {e}") - # ── Feature Ablation ────────────────────────────────────────────── - progress(0.72, desc="🧪 特征消融...") - log(f"\n 🧪 特征消融 (仅最佳模型 {best_mn})...") - ablation_data = None - if best_mn in shap_imp: - imp_df = shap_imp[best_mn] - top_feats = imp_df.head(TOPN)['Feature'].tolist() - fcs = []; aucs_a = [] - - for nf in range(1, len(top_feats) + 1): - Xsub = X[top_feats[:nf]] - fold_aucs = [] - for tri, tei in skf.split(Xsub, y_mapped): - mf = deepcopy(mcfg[best_mn]['model']) - bp2 = bpd.get(best_mn, {}) - if isinstance(bp2, dict) and bp2: - mf.set_params(**bp2) - mf.fit(Xsub.iloc[tri].values, y_mapped.iloc[tri]) - yproba_f = mf.predict_proba(Xsub.iloc[tei].values) - yte_f = y_mapped.iloc[tei] - try: - a = (roc_auc_score(yte_f, yproba_f[:, 1]) - if n_classes == 2 else - roc_auc_score(yte_f, yproba_f, - multi_class='ovr', average='macro')) - except: - a = 0.0 - fold_aucs.append(a) - fcs.append(nf); aucs_a.append(np.mean(fold_aucs)) - - full_auc = amr[best_mn]['mean_auc'] - opt_n = len(top_feats) - for i, a in enumerate(aucs_a): - if a >= full_auc * 0.95: - opt_n = i + 1; break - - ablation_data = { - 'fcs': fcs, 'aucs': aucs_a, 'feats': top_feats, - 'opt_n': opt_n, 'opt_feats': top_feats[:opt_n] - } - log(f" ✅ 最优特征数: {opt_n} " - f"(AUC={aucs_a[opt_n-1]:.4f} vs Full={full_auc:.4f})") - - plt.figure(figsize=(10, 7)) - plt.plot(fcs, aucs_a, 'o-', color='#2563eb', lw=2, ms=5) - plt.scatter([opt_n], [aucs_a[opt_n-1]], s=200, marker='*', - color='#ef4444', edgecolors='black', lw=2, zorder=5) - plt.axhline(y=full_auc, color='gray', ls='--', lw=1, alpha=0.5, - label=f'Full AUC={full_auc:.3f}') - plt.xlabel('Number of Features', fontsize=13) - plt.ylabel('Macro AUC', fontsize=13) - plt.title(f'Feature Ablation — {best_mn} (★ Optimal={opt_n})', - fontsize=14, fontweight='bold') - plt.legend(fontsize=11); plt.grid(True, alpha=0.15); plt.tight_layout() - plt.savefig(os.path.join(rf, 'ablation.pdf'), - format='pdf', bbox_inches='tight') - plt.savefig(os.path.join(rf, 'ablation.png'), - format='png', bbox_inches='tight', dpi=150) - plt.close() + # ── [v4-4] Step 2: Feature ablation with DeLong for each retained model + progress(0.62, desc="🧪 Step2: 特征消融 + DeLong检验...") + log(f"\n{'━'*60}") + log(f" 🧪 Step 2 — 特征消融 + DeLong检验(每个保留模型)") + log(f" 判定准则:首个 DeLong p≥0.05 的特征数 = 最优特征数") + + ablation_results={} # mn → ablation dict + all_delong_step2_rows=[] + + models_with_shap=[mn for mn in delong_retained if mn in shap_imp] + for ai,mn in enumerate(models_with_shap): + progress(0.62+0.15*ai/max(len(models_with_shap),1), + desc=f"🧪 消融: {mn}...") + top_feats=shap_imp[mn].head(TOPN)['Feature'].tolist() + log(f"\n 🔬 {mn} 消融中 (Top{TOPN} 特征)...") + + res=run_ablation_for_model( + mn, mcfg, bpd, X, y_mapped, class_indices, n_classes, + top_feats, skf, + amr[mn]['all_yt'], amr[mn]['all_yproba'], + ALPHA=ALPHA, N_BOOT=2000) + + ablation_results[mn]=res + all_delong_step2_rows.extend(res['delong_rows']) + + log(f" Full-feature AUC={res['full_auc']:.4f}") + for idx,nf in enumerate(res['fcs']): + sig='*' if res['pvals'][idx] 0.5)) - Xe = ed.iloc[:, 2:] if vcol2_is_id else ed.iloc[:, 1:] - - ye = ye_raw.map(label_map) + progress(0.80, desc="🧪 外部验证...") + log(f"\n{'━'*60}"); log(f" 🧪 外部验证 ({len(val_files_list)} 个验证集)") + for vi,vf in enumerate(val_files_list,1): + vp=vf if isinstance(vf,str) else getattr(vf,'name',str(vf)) + ed=pd.read_csv(vp); ye_raw=ed.iloc[:,0] + vcol2=ed.iloc[:,1] + Xe = ed.iloc[:,2:] if ((vcol2.dtype=='object') or + (vcol2.nunique()/len(vcol2)>0.5)) else ed.iloc[:,1:] + ye=ye_raw.map(label_map) if ye.isna().any(): - log(f" ⚠ 验证集 {vi} 含有训练集中不存在的标签,已跳过") - continue - - log(f"\n 📊 验证集 {vi}: {Xe.shape[0]} 样本, " - f"{os.path.basename(vp)}") - - Xes = Xe[final_feats]; Xtf = X[final_feats] - fm = deepcopy(mcfg[best_mn]['model']) - bp3 = bpd[best_mn] - if isinstance(bp3, dict) and bp3: - fm.set_params(**bp3) - fm.fit(Xtf.values, y_mapped) - yep = fm.predict_proba(Xes.values) - yed = fm.predict(Xes.values) - ye_np = ye.values - - ext_met = compute_multiclass_metrics( - ye_np, yed, yep, class_indices) - em = ext_met - log(f" ✅ AUC={em['Macro_AUC']:.4f} " - f"Acc={em['Accuracy']:.4f} " - f"Sens={em['Macro_Sensitivity']:.4f} " - f"Spec={em['Macro_Specificity']:.4f} " - f"PPV={em['Macro_PPV']:.4f} " - f"NPV={em['Macro_NPV']:.4f} " - f"F1={em['Macro_F1']:.4f} " - f"Kappa={em['Kappa']:.4f}") - - sfx = f'_ext{vi}' if len(val_files_list) > 1 else '_ext' - tag = f'Validation {vi}' if len(val_files_list) > 1 else 'External' - - plot_multiclass_roc(ye_np, yep, class_indices, - f'ROC — {tag} ({best_mn})', f'roc{sfx}', rf) - plot_multiclass_pr(ye_np, yep, class_indices, - f'PR — {tag} ({best_mn})', f'pr{sfx}', rf) - plot_confusion_matrix(ye_np, yed, class_indices, - f'CM — {tag} ({best_mn})', f'cm{sfx}', rf) - - # [v3-5] Extended external validation Excel - with pd.ExcelWriter( - os.path.join(rf, f'validation{sfx}.xlsx'), - engine='openpyxl' - ) as w: - # Macro metrics row - macro_row = {'Model': best_mn, - 'N_Features': len(final_feats)} - macro_row.update(metrics_to_flat_row(em)) - pd.DataFrame([macro_row]).to_excel( - w, sheet_name='Metrics_Macro', index=False) - # Per-class detail - per_class_df(em, class_indices).to_excel( - w, sheet_name='Metrics_PerClass', index=False) - pd.DataFrame({'Feature': final_feats}).to_excel( - w, sheet_name='Features', index=False) + log(f" ⚠ 验证集{vi}含未知标签,已跳过"); continue + log(f"\n 📊 验证集{vi}: {Xe.shape[0]} 样本, {os.path.basename(vp)}") + Xes=Xe[final_feats]; Xtf=X[final_feats] + fm=deepcopy(mcfg[best_mn]['model']) + bp3=bpd[best_mn] + if isinstance(bp3,dict) and bp3: fm.set_params(**bp3) + fm.fit(Xtf.values,y_mapped) + yep=fm.predict_proba(Xes.values); yed=fm.predict(Xes.values) + em=compute_multiclass_metrics(ye.values,yed,yep,class_indices) + log(f" ✅ AUC={em['Macro_AUC']:.4f} Acc={em['Accuracy']:.4f} " + f"Sens={em['Macro_Sensitivity']:.4f} Spec={em['Macro_Specificity']:.4f} " + f"PPV={em['Macro_PPV']:.4f} NPV={em['Macro_NPV']:.4f} " + f"F1={em['Macro_F1']:.4f} Kappa={em['Kappa']:.4f}") + sfx=f'_ext{vi}' if len(val_files_list)>1 else '_ext' + tag=f'Validation {vi}' if len(val_files_list)>1 else 'External' + plot_multiclass_roc(ye.values,yep,class_indices, + f'ROC — {tag} ({best_mn})',f'roc{sfx}',rf) + plot_multiclass_pr(ye.values,yep,class_indices, + f'PR — {tag} ({best_mn})',f'pr{sfx}',rf) + plot_confusion_matrix(ye.values,yed,class_indices, + f'CM — {tag} ({best_mn})',f'cm{sfx}',rf) + with pd.ExcelWriter(os.path.join(rf,f'validation{sfx}.xlsx'), + engine='openpyxl') as w: + mr2={'Model':best_mn,'N_Features':len(final_feats)} + mr2.update(metrics_to_flat_row(em)) + pd.DataFrame([mr2]).to_excel(w,sheet_name='Metrics_Macro',index=False) + per_class_df(em,class_indices).to_excel(w,sheet_name='Metrics_PerClass',index=False) + pd.DataFrame({'Feature':final_feats}).to_excel(w,sheet_name='Features',index=False) # ── Save Results ────────────────────────────────────────────────── - progress(0.92, desc="💾 保存结果...") + progress(0.90, desc="💾 保存结果...") log(f"\n 💾 保存结果...") - with pd.ExcelWriter( - os.path.join(rf, 'model_evaluation.xlsx'), - engine='openpyxl' - ) as w: - - # 1. Per-fold CV results for every model [v3-2 extended columns] - for mn, r in amr.items(): - r['fold_df'].to_excel(w, sheet_name=mn, index=False) - - # 2. Summary — Internal Validation (CV-OOF) [v3-3 all metrics] - sd = [] - for mn, r in amr.items(): - row = { - 'Model': mn, - 'Retained': 'Yes' if mn in retained else 'No', - 'Best': 'Best' if mn == best_mn else '', - } - row.update({ - 'AUC': r['mean_auc'], - 'Accuracy': r['mean_acc'], - 'Sensitivity': r['mean_sens'], - 'Specificity': r['mean_spec'], - 'PPV': r['mean_ppv'], - 'NPV': r['mean_npv'], - 'F1': r['mean_f1'], - 'Weighted_F1': r['mean_wf1'], - 'Kappa': r['mean_kappa'], - }) + with pd.ExcelWriter(os.path.join(rf,'model_evaluation.xlsx'),engine='openpyxl') as w: + + # Per-fold tables + for mn,r in amr.items(): + r['fold_df'].to_excel(w,sheet_name=mn[:31],index=False) + + # Summary CV-OOF + sd=[] + for mn,r in amr.items(): + row={'Model':mn, + 'DeLong_Retained':'Yes' if mn in delong_retained else 'No', + 'Final_Model':'Yes' if mn==best_mn else ''} + row.update({'AUC':r['mean_auc'],'Accuracy':r['mean_acc'], + 'Sensitivity':r['mean_sens'],'Specificity':r['mean_spec'], + 'PPV':r['mean_ppv'],'NPV':r['mean_npv'], + 'F1':r['mean_f1'],'Weighted_F1':r['mean_wf1'],'Kappa':r['mean_kappa']}) sd.append(row) - (pd.DataFrame(sd) - .sort_values('AUC', ascending=False) - .to_excel(w, sheet_name='Summary_InternalVal', index=False)) + (pd.DataFrame(sd).sort_values('AUC',ascending=False) + .to_excel(w,sheet_name='Summary_InternalVal',index=False)) - # 3. Train vs Internal Validation [v3-3 all metrics] - comparison_rows = [] + # Train vs Val comparison + comp=[] for mn in amr: - tr_m = train_results[mn]['metrics'] - vm = amr[mn] - row = { - 'Model': mn, - 'Train_AUC': tr_m['Macro_AUC'], - 'Train_Accuracy': tr_m['Accuracy'], - 'Train_Sensitivity': tr_m['Macro_Sensitivity'], - 'Train_Specificity': tr_m['Macro_Specificity'], - 'Train_PPV': tr_m['Macro_PPV'], - 'Train_NPV': tr_m['Macro_NPV'], - 'Train_F1': tr_m['Macro_F1'], - 'Train_Kappa': tr_m['Kappa'], - 'Val_AUC': vm['mean_auc'], - 'Val_Accuracy': vm['mean_acc'], - 'Val_Sensitivity': vm['mean_sens'], - 'Val_Specificity': vm['mean_spec'], - 'Val_PPV': vm['mean_ppv'], - 'Val_NPV': vm['mean_npv'], - 'Val_F1': vm['mean_f1'], - 'Val_Kappa': vm['mean_kappa'], - 'AUC_Gap': tr_m['Macro_AUC'] - vm['mean_auc'], - 'Retained': 'Yes' if mn in retained else 'No', - 'Best': 'Best' if mn == best_mn else '', - } - comparison_rows.append(row) - (pd.DataFrame(comparison_rows) - .sort_values('Val_AUC', ascending=False) - .to_excel(w, sheet_name='Train_vs_InternalVal', index=False)) - - # 4. Bootstrap test - if len(bootstrap_df) > 0: - bootstrap_df.to_excel(w, sheet_name='Bootstrap_Test', - index=False) - - # 5. [v3-4] Per-class detail for EVERY model (train + val) + tr_m=train_results[mn]['metrics']; vm=amr[mn] + comp.append({'Model':mn, + 'Train_AUC':tr_m['Macro_AUC'],'Train_Acc':tr_m['Accuracy'], + 'Train_Sens':tr_m['Macro_Sensitivity'],'Train_Spec':tr_m['Macro_Specificity'], + 'Train_PPV':tr_m['Macro_PPV'],'Train_NPV':tr_m['Macro_NPV'], + 'Train_F1':tr_m['Macro_F1'],'Train_Kappa':tr_m['Kappa'], + 'Val_AUC':vm['mean_auc'],'Val_Acc':vm['mean_acc'], + 'Val_Sens':vm['mean_sens'],'Val_Spec':vm['mean_spec'], + 'Val_PPV':vm['mean_ppv'],'Val_NPV':vm['mean_npv'], + 'Val_F1':vm['mean_f1'],'Val_Kappa':vm['mean_kappa'], + 'AUC_Gap':tr_m['Macro_AUC']-vm['mean_auc'], + 'DeLong_Retained':'Yes' if mn in delong_retained else 'No', + 'Final_Model':'Yes' if mn==best_mn else ''}) + (pd.DataFrame(comp).sort_values('Val_AUC',ascending=False) + .to_excel(w,sheet_name='Train_vs_InternalVal',index=False)) + + # [v4-1] DeLong Step1 + delong_step1_df.to_excel(w,sheet_name='DeLong_Step1_ModelSel',index=False) + + # [v4-4] DeLong Step2 — full ablation table + if all_delong_step2_rows: + (pd.DataFrame(all_delong_step2_rows) + .to_excel(w,sheet_name='DeLong_Step2_Ablation',index=False)) + + # [v4-5] Model selection summary + if not selection_df.empty: + selection_df.sort_values(['Opt_N','Full_AUC'], + ascending=[True,False]).to_excel( + w,sheet_name='ModelSelection_Summary',index=False) + + # Per-class detail for every model for mn in mnames: - # Val (OOF) - oof_pc = per_class_df(amr[mn]['oof_metrics'], class_indices) - sheet_v = f'{mn}_Val_PerClass' - if len(sheet_v) > 31: sheet_v = sheet_v[:31] - oof_pc.to_excel(w, sheet_name=sheet_v, index=False) - - # Train - tr_pc = per_class_df(train_results[mn]['metrics'], class_indices) - sheet_t = f'{mn}_Train_PerClass' - if len(sheet_t) > 31: sheet_t = sheet_t[:31] - tr_pc.to_excel(w, sheet_name=sheet_t, index=False) - - # Ablation Excel - if ablation_data: - with pd.ExcelWriter( - os.path.join(rf, 'feature_ablation.xlsx'), - engine='openpyxl' - ) as w: - pd.DataFrame({ - 'N': ablation_data['fcs'], - 'AUC': ablation_data['aucs'] - }).to_excel(w, sheet_name='Ablation', index=False) - for mn, idf in shap_imp.items(): - idf.to_excel(w, sheet_name=f'{mn}_Imp', index=False) - - # ── best_params.txt [v3-6] all metrics ────────────────────────── - with open(os.path.join(rf, 'best_params.txt'), 'w', + s=f'{mn}_Val_PC'; s=s[:31] + per_class_df(amr[mn]['oof_metrics'],class_indices).to_excel( + w,sheet_name=s,index=False) + s=f'{mn}_Train_PC'; s=s[:31] + per_class_df(train_results[mn]['metrics'],class_indices).to_excel( + w,sheet_name=s,index=False) + + # SHAP importance Excel + if shap_imp: + with pd.ExcelWriter(os.path.join(rf,'shap_importance.xlsx'), + engine='openpyxl') as w: + for mn,idf in shap_imp.items(): + idf.to_excel(w,sheet_name=mn[:31],index=False) + + # Ablation summary Excel + if ablation_results: + with pd.ExcelWriter(os.path.join(rf,'feature_ablation.xlsx'), + engine='openpyxl') as w: + for mn,res in ablation_results.items(): + df_abl=pd.DataFrame({ + 'N_Features':res['fcs'],'AUC':res['aucs'], + 'P_value':res['pvals'],'Z_score':res['zscores'], + 'CI_Low':res['ci_los'],'CI_High':res['ci_his'], + 'Significant':[('Yes' if p7} {'Acc':>7} {'Sens':>7} {'Spec':>7} " - f"{'PPV':>7} {'NPV':>7} {'F1':>7} {'Kappa':>7}\n") - f.write("-" * 65 + "\n") - - def fmt_row(label, m_auc, m_acc, m_sens, m_spec, - m_ppv, m_npv, m_f1, m_kappa): - return (f"{label:<14} " - f"{m_auc:>7.4f} {m_acc:>7.4f} {m_sens:>7.4f} " - f"{m_spec:>7.4f} {m_ppv:>7.4f} {m_npv:>7.4f} " - f"{m_f1:>7.4f} {m_kappa:>7.4f}\n") - - for mn in mcfg: - status = ("* Best" if mn == best_mn - else ("Retained" if mn in retained else "Excluded")) - tr_m = train_results[mn]['metrics'] - vm = amr[mn] - f.write(f"\nModel: {mn} | {status}\n") - f.write(fmt_row( - " Train", - tr_m['Macro_AUC'], tr_m['Accuracy'], - tr_m['Macro_Sensitivity'], tr_m['Macro_Specificity'], - tr_m['Macro_PPV'], tr_m['Macro_NPV'], - tr_m['Macro_F1'], tr_m['Kappa'])) - f.write(fmt_row( - " CV-OOF", - vm['mean_auc'], vm['mean_acc'], - vm['mean_sens'], vm['mean_spec'], - vm['mean_ppv'], vm['mean_npv'], - vm['mean_f1'], vm['mean_kappa'])) - f.write(f" AUC Gap: " - f"{tr_m['Macro_AUC'] - vm['mean_auc']:+.4f}\n") - bp = bpd[mn] - if isinstance(bp, dict): - for k, v in bp.items(): - f.write(f" {k}: {v}\n") - else: - f.write(f" Params: {bp}\n") - - if len(bootstrap_df) > 0: - f.write("\n" + "=" * 65 + "\n") - f.write("Bootstrap AUC Comparison Results\n") - f.write("=" * 65 + "\n") - for _, row in bootstrap_df.iterrows(): - f.write(f" {row['Model_A']} vs {row['Model_B']}: " - f"dAUC={row['AUC_Diff']:+.4f} " - f"95%CI=[{row['CI_95_Low']:+.4f}," - f"{row['CI_95_High']:+.4f}] " - f"P={row['P_value']:.4f} -> {row['Decision']}\n") - if ablation_data: - f.write(f"\nOptimal Features ({ablation_data['opt_n']}): " - f"{', '.join(ablation_data['opt_feats'])}\n") - - # Save best model pickle + f.write("="*70+"\n") + f.write("ML Multi-Class Pipeline v4 — Model Selection Report\n") + f.write("="*70+"\n\n") + f.write(f"Task: {task_type} ({n_classes} classes: {classes})\n") + f.write(f"Training samples: {X.shape[0]} Features: {X.shape[1]}\n\n") + + f.write("─"*70+"\n") + f.write("STEP 1: DeLong Test — Model Screening\n") + f.write(f"Reference (best): {ref_mn} OOF AUC={ref_auc:.4f}\n") + f.write(f"{'Model':<12}{'AUC':>8}{'ΔAUC':>9}{'CI_Low':>10}" + f"{'CI_High':>10}{'Z':>8}{'P':>9}{'Decision':<12}\n") + f.write("─"*70+"\n") + for _,row in delong_step1_df.iterrows(): + f.write(f"{row['Model']:<12}{row['AUC']:>8.4f}" + f"{row['AUC_Diff']:>+9.4f}{row['CI_95_Low']:>+10.4f}" + f"{row['CI_95_High']:>+10.4f}{row['Z_score']:>+8.3f}" + f"{row['P_value']:>9.4f} {row['Decision']}\n") + f.write(f"\nRetained: {', '.join(delong_retained)}\n\n") + + f.write("─"*70+"\n") + f.write("STEP 2: Feature Ablation + DeLong Test (subset vs full)\n") + f.write(f"{'Model':<12}{'Full_AUC':>10}{'Opt_N':>7}" + f"{'Opt_AUC':>10}{'p@Opt':>10}\n") + f.write("─"*70+"\n") + for mn in models_with_shap: + res=ablation_results[mn] + f.write(f"{mn:<12}{res['full_auc']:>10.4f}{res['opt_n']:>7d}" + f"{res['opt_auc']:>10.4f}" + f"{res['pvals'][res['opt_n']-1]:>10.4f}\n") + + f.write("\n"+"─"*70+"\n") + f.write("STEP 3: Final Model Selection\n") + f.write("Rule: ① min(Opt_N); ② tie-break → max(Full_AUC)\n") + f.write(f"{'Model':<12}{'Opt_N':>7}{'Opt_AUC':>10}{'Full_AUC':>11}\n") + f.write("─"*70+"\n") + if not selection_df.empty: + for _,row in selection_df.sort_values( + ['Opt_N','Full_AUC'],ascending=[True,False]).iterrows(): + marker='★ WINNER' if row['Model']==best_mn else '' + f.write(f"{row['Model']:<12}{row['Opt_N']:>7d}" + f"{row['Opt_AUC']:>10.4f}" + f"{row['Full_AUC']:>11.4f} {marker}\n") + f.write(f"\nFINAL MODEL: {best_mn}\n") + f.write(f"Optimal features ({len(final_feats)}): " + f"{', '.join(final_feats)}\n") + + # Save model pickle.dump({ - 'model_name': best_mn, - 'model': tms[best_mn], - 'best_params': bpd[best_mn], - 'classes': classes, - 'n_classes': n_classes, - 'label_map': label_map, - 'features': final_feats, - 'task_type': task_type, - }, open(os.path.join(rf, f'model_{best_mn}.pkl'), 'wb')) + 'model_name':best_mn,'model':tms[best_mn], + 'best_params':bpd[best_mn],'classes':classes, + 'n_classes':n_classes,'label_map':label_map, + 'features':final_feats,'task_type':task_type, + },open(os.path.join(rf,f'model_{best_mn}.pkl'),'wb')) # ── ZIP ─────────────────────────────────────────────────────────── progress(0.97, desc="📦 打包ZIP...") - zp = os.path.join(tempfile.gettempdir(), - f"ml_results_{int(time.time())}_{os.getpid()}.zip") - with zipfile.ZipFile(zp, 'w', zipfile.ZIP_DEFLATED) as zf: - for root, _, files in os.walk(rf): + zp=os.path.join(tempfile.gettempdir(), + f"ml_results_{int(time.time())}_{os.getpid()}.zip") + with zipfile.ZipFile(zp,'w',zipfile.ZIP_DEFLATED) as zf: + for root,_,files in os.walk(rf): for fn in files: - zf.write(os.path.join(root, fn), - os.path.relpath(os.path.join(root, fn), rf)) + zf.write(os.path.join(root,fn), + os.path.relpath(os.path.join(root,fn),rf)) + nf=sum(len(f) for _,_,f in os.walk(rf)) + shutil.rmtree(rf,ignore_errors=True); gc.collect() - nf = sum(len(f) for _, _, f in os.walk(rf)) - shutil.rmtree(rf, ignore_errors=True); gc.collect() - - tm_b = train_results[best_mn]['metrics'] log(f"\n{'━'*60}") - log(f" 🎉 分析完成!共 {nf} 个文件已打包") - log(f" 📋 Task: {task_type} | Best Model: {best_mn}") - log(f" 📊 Train — AUC={tm_b['Macro_AUC']:.4f} " - f"Acc={tm_b['Accuracy']:.4f} " - f"Sens={tm_b['Macro_Sensitivity']:.4f} " - f"Spec={tm_b['Macro_Specificity']:.4f} " - f"PPV={tm_b['Macro_PPV']:.4f} NPV={tm_b['Macro_NPV']:.4f} " - f"F1={tm_b['Macro_F1']:.4f}") - log(f" 📊 CV-OOF — AUC={best_auc:.4f} " - f"Acc={amr[best_mn]['mean_acc']:.4f} " - f"Sens={amr[best_mn]['mean_sens']:.4f} " - f"Spec={amr[best_mn]['mean_spec']:.4f} " - f"PPV={amr[best_mn]['mean_ppv']:.4f} " - f"NPV={amr[best_mn]['mean_npv']:.4f} " - f"F1={amr[best_mn]['mean_f1']:.4f}") + log(f" 🎉 分析完成!{nf} 个文件已打包") + log(f" 🏆 最终模型: {best_mn} 最优特征数: {len(final_feats)}") + log(f" 📊 全特征OOF AUC: {amr[best_mn]['mean_auc']:.4f}") + log(f" 📊 消融后OOF AUC: {ablation_results[best_mn]['opt_auc']:.4f}" + if best_mn in ablation_results else "") log(f"{'━'*60}") progress(1.0, desc="✅ 完成!") - return zp, "\n".join(L) + return zp,"\n".join(L) except Exception as e: - log(f"\n❌ 错误: {e}") - log(traceback.format_exc()) - if os.path.exists(rf): shutil.rmtree(rf, ignore_errors=True) + log(f"\n❌ 错误: {e}"); log(traceback.format_exc()) + if os.path.exists(rf): shutil.rmtree(rf,ignore_errors=True) gc.collect() - return None, "\n".join(L) + return None,"\n".join(L) # ============================================================================ # Gradio UI # ============================================================================ -CUSTOM_CSS = """ -.header-banner { - background: linear-gradient(135deg, #0a2463 0%, #1e3a7a 40%, #2554a8 100%); - border-radius: 16px; padding: 28px 36px; margin-bottom: 20px; - box-shadow: 0 8px 32px rgba(0,0,0,0.18); position: relative; overflow: hidden; -} -.header-banner::before { - content: ''; position: absolute; top: -50%; right: -20%; - width: 400px; height: 400px; - background: radial-gradient(circle, rgba(96,165,250,0.2) 0%, transparent 70%); - border-radius: 50%; -} -.header-banner img { max-height: 52px; border-radius: 6px; margin-bottom: 12px; } -.header-banner h1 { color: #e2e8f0 !important; font-size: 1.7em !important; - margin: 4px 0 6px 0 !important; font-weight: 700 !important; } -.header-banner p { color: #94a3b8 !important; font-size: 0.92em !important; - margin: 2px 0 !important; line-height: 1.6; } -.header-banner .credit { color: #64748b !important; font-size: 0.82em !important; - margin-top: 10px !important; - border-top: 1px solid rgba(148,163,184,0.15); padding-top: 10px; } -.section-title { - background: linear-gradient(90deg, #2563eb 0%, #3b82f6 100%); - color: white !important; padding: 8px 16px; border-radius: 8px; - font-size: 0.95em !important; font-weight: 600 !important; - margin: 12px 0 8px 0; } -.pipeline-box { - background: linear-gradient(135deg, #f0f9ff 0%, #e0f2fe 100%); - border: 1px solid #bae6fd; border-radius: 12px; - padding: 14px 18px; margin: 8px 0; font-size: 0.88em; } -.pipeline-box code { background: #2563eb; color: white; padding: 2px 8px; - border-radius: 4px; font-size: 0.85em; margin: 0 2px; } -.log-area textarea { - font-family: 'Menlo','Consolas',monospace !important; - font-size: 12.5px !important; line-height: 1.5 !important; - background: #0f172a !important; color: #e2e8f0 !important; - border-radius: 10px !important; padding: 16px !important; } -.gradio-container { max-width: 1280px !important; } -footer { display: none !important; } +CUSTOM_CSS=""" +.header-banner{background:linear-gradient(135deg,#0a2463 0%,#1e3a7a 40%,#2554a8 100%); +border-radius:16px;padding:28px 36px;margin-bottom:20px; +box-shadow:0 8px 32px rgba(0,0,0,0.18);position:relative;overflow:hidden;} +.header-banner::before{content:'';position:absolute;top:-50%;right:-20%; +width:400px;height:400px; +background:radial-gradient(circle,rgba(96,165,250,0.2) 0%,transparent 70%);border-radius:50%;} +.header-banner img{max-height:52px;border-radius:6px;margin-bottom:12px;} +.header-banner h1{color:#e2e8f0!important;font-size:1.7em!important; +margin:4px 0 6px 0!important;font-weight:700!important;} +.header-banner p{color:#94a3b8!important;font-size:0.92em!important; +margin:2px 0!important;line-height:1.6;} +.header-banner .credit{color:#64748b!important;font-size:0.82em!important; +margin-top:10px!important;border-top:1px solid rgba(148,163,184,0.15);padding-top:10px;} +.section-title{background:linear-gradient(90deg,#2563eb 0%,#3b82f6 100%); +color:white!important;padding:8px 16px;border-radius:8px; +font-size:0.95em!important;font-weight:600!important;margin:12px 0 8px 0;} +.pipeline-box{background:linear-gradient(135deg,#f0f9ff 0%,#e0f2fe 100%); +border:1px solid #bae6fd;border-radius:12px;padding:14px 18px;margin:8px 0;font-size:0.88em;} +.pipeline-box code{background:#2563eb;color:white;padding:2px 8px; +border-radius:4px;font-size:0.85em;margin:0 2px;} +.log-area textarea{font-family:'Menlo','Consolas',monospace!important; +font-size:12.5px!important;line-height:1.5!important; +background:#0f172a!important;color:#e2e8f0!important; +border-radius:10px!important;padding:16px!important;} +.gradio-container{max-width:1280px!important;} +footer{display:none!important;} """ with gr.Blocks( - title="ML 多分类模型平台 — 复旦大学附属眼耳鼻喉科医院", - theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate", - neutral_hue="slate"), + title="ML 多分类模型平台 v4 — 复旦大学附属眼耳鼻喉科医院", + theme=gr.themes.Soft(primary_hue="blue",secondary_hue="slate",neutral_hue="slate"), css=CUSTOM_CSS, ) as demo: @@ -1358,101 +1212,86 @@ with gr.Blocks(
Logo -

🧬 ML 多分类模型训练与评估平台

-

支持 2~8 分类 · 上传 CSV 即可完成全流程分析

-

评估指标:AUC · Accuracy · Sensitivity · Specificity · PPV · NPV · F1 · Kappa

+

🧬 ML 多分类模型训练与评估平台 v4

+

支持 2~8 分类 · DeLong检验模型筛选 · 逐模型特征消融 · 自动最优模型决策

+

指标:AUC · Accuracy · Sensitivity · Specificity · PPV · NPV · F1 · Kappa

复旦大学附属眼耳鼻喉科医院 · 检验科 · 任俊

""") gr.HTML("""
- 📋 流程: - 训练+训练集评估交叉验证(OOF) → - Train vs Val对比SHAP → - 特征消融外部验证 -  |  - 指标: - AUC · Accuracy · Sensitivity · Specificity · PPV · NPV · F1 · Kappa(宏平均+逐类) + 📋 三步流程: + Step1 DeLong筛模型Step2 逐模型消融+DeLong vs全特征 → + Step3 特征最少优先选最终模型  |  CSV: 第1列=标签(整数), 第2列=ID, 第3列起=特征 +  |  + 输出: ROC/PR/CM(训练+验证) · SHAP · 消融曲线 · DeLong检验表 · 选模报告
""") with gr.Row(equal_height=False): with gr.Column(scale=5): gr.HTML('
📂 数据上传
') - train_file = gr.File(label="训练集 CSV(必需)", file_types=[".csv"]) + train_file=gr.File(label="训练集 CSV(必需)",file_types=[".csv"]) gr.HTML('

' '验证集可选,支持同时上传 1~3 个

') with gr.Row(): - val_file1 = gr.File(label="验证集 1(可选)", - file_types=[".csv"], scale=1) - val_file2 = gr.File(label="验证集 2(可选)", - file_types=[".csv"], scale=1) - val_file3 = gr.File(label="验证集 3(可选)", - file_types=[".csv"], scale=1) + val_file1=gr.File(label="验证集 1(可选)",file_types=[".csv"],scale=1) + val_file2=gr.File(label="验证集 2(可选)",file_types=[".csv"],scale=1) + val_file3=gr.File(label="验证集 3(可选)",file_types=[".csv"],scale=1) gr.HTML('
🏷️ 分类设置
') - n_classes_select = gr.Dropdown( - choices=["2 类(二分类)","3 类","4 类","5 类", - "6 类","7 类","8 类"], - value="2 类(二分类)", label="选择分类数", - info="请根据数据标签列的类别数选择,系统将自动验证是否匹配", - ) + n_classes_select=gr.Dropdown( + choices=["2 类(二分类)","3 类","4 类","5 类","6 类","7 类","8 类"], + value="2 类(二分类)",label="选择分类数", + info="请根据数据标签列的类别数选择,系统将自动验证是否匹配") gr.HTML('
🤖 模型选择
') - model_selector = gr.Dropdown( - choices=ALL_MODEL_NAMES, value=ALL_MODEL_NAMES, - multiselect=True, label="选择模型(均支持多分类)", - info=("RF=随机森林 DT=决策树 KNN=K近邻 XGB=XGBoost " - "AdaBoost LR=逻辑回归 NB=朴素贝叶斯 SVM=支持向量机"), - ) + model_selector=gr.Dropdown( + choices=ALL_MODEL_NAMES,value=ALL_MODEL_NAMES,multiselect=True, + label="选择模型(均支持多分类)", + info="RF=随机森林 DT=决策树 KNN=K近邻 XGB=XGBoost " + "AdaBoost LR=逻辑回归 NB=朴素贝叶斯 SVM=支持向量机") with gr.Row(): - btn_all = gr.Button("🔘 全选", size="sm", variant="secondary") - btn_tree = gr.Button("🌲 树模型", size="sm", variant="secondary") - btn_linear = gr.Button("📐 线性模型", size="sm", variant="secondary") - btn_top4 = gr.Button("⚡ 经典四模型", size="sm", variant="secondary") - btn_all.click(lambda: ALL_MODEL_NAMES, outputs=model_selector) - btn_tree.click(lambda: ['RF','DT','XGB','AdaBoost'], outputs=model_selector) - btn_linear.click(lambda: ['LR','SVM','NB'], outputs=model_selector) - btn_top4.click(lambda: ['RF','XGB','LR','SVM'], outputs=model_selector) + btn_all =gr.Button("🔘 全选", size="sm",variant="secondary") + btn_tree =gr.Button("🌲 树模型", size="sm",variant="secondary") + btn_linear=gr.Button("📐 线性模型", size="sm",variant="secondary") + btn_top4 =gr.Button("⚡ 经典四模型",size="sm",variant="secondary") + btn_all.click(lambda:ALL_MODEL_NAMES, outputs=model_selector) + btn_tree.click(lambda:['RF','DT','XGB','AdaBoost'],outputs=model_selector) + btn_linear.click(lambda:['LR','SVM','NB'], outputs=model_selector) + btn_top4.click(lambda:['RF','XGB','LR','SVM'], outputs=model_selector) gr.HTML('
⚙️ 参数配置
') - enable_tuning = gr.Checkbox( - value=False, + enable_tuning=gr.Checkbox(value=False, label="启用超参数调优 (GridSearchCV) ⚠️ 开启后运行时间显著增加") with gr.Row(): - cv_folds = gr.Slider(3, 10, value=5, step=1, - label="交叉验证折数") - top_n = gr.Slider(5, 50, value=20, step=1, - label="SHAP 前 N 个特征") - shap_sz = gr.Slider(30, 200, value=80, step=10, - label="SHAP 采样数量") - - run_btn = gr.Button("🚀 开始分析", variant="primary", size="lg") + cv_folds=gr.Slider(3,10,value=5,step=1,label="交叉验证折数") + top_n =gr.Slider(5,50,value=20,step=1,label="SHAP 前 N 个特征(消融上限)") + shap_sz=gr.Slider(30,200,value=80,step=10,label="SHAP 采样数量") + run_btn=gr.Button("🚀 开始分析",variant="primary",size="lg") with gr.Column(scale=5): gr.HTML('
📋 运行日志
') - log_output = gr.Textbox( - label="", lines=24, max_lines=50, interactive=False, - placeholder=("点击「开始分析」后,日志将在此显示...\n" - "支持 2~8 分类。\n" - "评估指标:AUC / Accuracy / Sensitivity / " - "Specificity / PPV / NPV / F1 / Kappa"), - elem_classes="log-area", - ) + log_output=gr.Textbox( + label="",lines=28,max_lines=60,interactive=False, + placeholder=("点击「开始分析」后日志将在此显示...\n\n" + "Step 1: DeLong检验 — 筛选与最佳模型无显著差异的模型\n" + "Step 2: 各保留模型SHAP+特征消融+DeLong(子集 vs 全特征)\n" + "Step 3: 特征数最少者为最终模型;相同则取AUC最高者"), + elem_classes="log-area") gr.HTML('
⬇️ 结果下载
') - zip_output = gr.File(label="分析结果 ZIP 压缩包") + zip_output=gr.File(label="分析结果 ZIP 压缩包") run_btn.click( fn=run_pipeline, - inputs=[train_file, val_file1, val_file2, val_file3, - n_classes_select, model_selector, enable_tuning, - cv_folds, top_n, shap_sz], - outputs=[zip_output, log_output], - api_name="run", - ) + inputs=[train_file,val_file1,val_file2,val_file3, + n_classes_select,model_selector,enable_tuning, + cv_folds,top_n,shap_sz], + outputs=[zip_output,log_output], + api_name="run") # ============================================================================ @@ -1460,28 +1299,22 @@ with gr.Blocks( # ============================================================================ from datetime import datetime -ACCOUNTS = { - "admin": {"password": "admin123", "expires": None}, - "renjun": {"password": "fudan2025", "expires": "2027-12-31"}, - "guest": {"password": "guest888", "expires": "2027-06-30"}, +ACCOUNTS={ + "admin": {"password":"admin123", "expires":None}, + "renjun": {"password":"fudan2025", "expires":"2027-12-31"}, + "guest": {"password":"guest888", "expires":"2027-06-30"}, } -def auth_fn(username, password): - user = ACCOUNTS.get(username) - if not user or user["password"] != password: return False +def auth_fn(username,password): + user=ACCOUNTS.get(username) + if not user or user["password"]!=password: return False if user["expires"]: try: - if datetime.now() > datetime.strptime(user["expires"], "%Y-%m-%d"): - return False + if datetime.now()>datetime.strptime(user["expires"],"%Y-%m-%d"): return False except: return False return True demo.queue() -demo.launch( - server_name="0.0.0.0", - server_port=7860, - auth=auth_fn, - auth_message=("🔐 复旦大学附属眼耳鼻喉科医院 · ML多分类分析平台\n" - "请输入账号和密码登录"), - ssr_mode=False, -) \ No newline at end of file +demo.launch(server_name="0.0.0.0",server_port=7860,auth=auth_fn, + auth_message="🔐 复旦大学附属眼耳鼻喉科医院 · ML多分类分析平台 v4\n请输入账号和密码登录", + ssr_mode=False) \ No newline at end of file