Spaces:

xiaohy
/

MathTutor-MIA-Defense

Running

App Files Files Community

xiaohy commited on 5 days ago

Commit

afed715

verified ·

1 Parent(s): 687782e

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -38

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # ================================================================
-# 教育大模型MIA攻防研究 - Gradio演示系统 v7.0 学术巅峰版
-# 彻底消灭普通 e/s，全量启用 LaTeX 原生数学斜体 $\epsilon$ 和 $\sigma$
 # ================================================================
 import os
@@ -31,6 +32,7 @@ def clean_text(text):
     text = re.sub(r'[\u200b-\u200f\u2028-\u202f\u2060-\u206f\ufeff]', '', text)
     return text.strip()
 try:
     member_data = load_json("data/member.json")
     non_member_data = load_json("data/non_member.json")
@@ -57,11 +59,12 @@ except FileNotFoundError:
     for s in [0.005, 0.01, 0.015, 0.02, 0.025, 0.03]:
         k = f"perturbation_{s}"
         perturb_results[k] = {m: v*0.85 for m, v in mia_results["baseline"].items()}
         perturb_results[k]["member_loss_std"] = np.sqrt(0.03**2 + s**2)
         perturb_results[k]["non_member_loss_std"] = np.sqrt(0.03**2 + s**2)
 # ================================================================
-# 全局图表配置
 # ================================================================
 COLORS = {
     'bg': '#FFFFFF',
@@ -78,6 +81,8 @@ COLORS = {
     'ls_colors': ['#A0C4FF', '#70A1FF', '#478EFF', '#007AFF'],
     'op_colors': ['#98F5E1', '#6EE7B7', '#34D399', '#10B981', '#059669', '#047857'],
 }
 CHART_W = 14
 def apply_light_style(fig, ax_or_axes):
@@ -96,15 +101,17 @@ def apply_light_style(fig, ax_or_axes):
         ax.grid(True, color=COLORS['grid'], alpha=0.6, linestyle='-', linewidth=0.8)
         ax.set_axisbelow(True)
-# 🌟🌟🌟 核心修改：专门为画图准备的 LaTeX 格式标签 🌟🌟🌟
 LS_KEYS = ["baseline", "smooth_eps_0.02", "smooth_eps_0.05", "smooth_eps_0.1", "smooth_eps_0.2"]
 LS_LABELS_PLOT = ["Baseline", r"LS($\epsilon$=0.02)", r"LS($\epsilon$=0.05)", r"LS($\epsilon$=0.1)", r"LS($\epsilon$=0.2)"]
-LS_LABELS_UI = ["基线(Baseline)", "LS(ε=0.02)", "LS(ε=0.05)", "LS(ε=0.1)", "LS(ε=0.2)"]
 OP_SIGMAS = [0.005, 0.01, 0.015, 0.02, 0.025, 0.03]
 OP_KEYS = [f"perturbation_{s}" for s in OP_SIGMAS]
-OP_LABELS_PLOT = [f"OP($\sigma$={s})" for s in OP_SIGMAS]
-OP_LABELS_UI = [f"OP(σ={s})" for s in OP_SIGMAS]
 ALL_KEYS = LS_KEYS + OP_KEYS
@@ -123,8 +130,12 @@ bl_acc = gu("baseline")
 bl_m_mean = gm("baseline", "member_loss_mean")
 bl_nm_mean = gm("baseline", "non_member_loss_mean")
-TYPE_CN = {'calculation': '基础计算', 'word_problem': '应用题', 'concept': '概念问答', 'error_correction': '错题订正'}
 np.random.seed(777)
 EVAL_POOL = []
 _types = ['calculation']*120 + ['word_problem']*90 + ['concept']*60 + ['error_correction']*30
@@ -138,7 +149,8 @@ for _i in range(300):
         else: _q,_ans=f"{_a} x {_b} = ?",str(_a*_b)
     elif _t == 'word_problem':
         _a,_b = int(np.random.randint(5,200)), int(np.random.randint(3,50))
-        _tpls = [(f"{_a} apples, ate {_b}, left?",str(_a-_b)), (f"{_a} per group, {_b} groups, total?",str(_a*_b))]
         _q,_ans = _tpls[_i%len(_tpls)]
     elif _t == 'concept':
         _cs = [("area","Area = space occupied by a shape"),("perimeter","Perimeter = total boundary length")]
@@ -153,27 +165,41 @@ for _i in range(300):
     EVAL_POOL.append(item)
 # ================================================================
-# 图表绘制函数 (全部更换为 LaTeX 渲染)
 # ================================================================
 def fig_gauge(loss_val, m_mean, nm_mean, thr, m_std, nm_std):
-    fig, ax = plt.subplots(figsize=(10, 2.6)); fig.patch.set_facecolor(COLORS['bg']); ax.set_facecolor(COLORS['panel'])
-    xlo = min(m_mean - 3.0 * m_std, loss_val - 0.005); xhi = max(nm_mean + 3.0 * nm_std, loss_val + 0.005)
-    ax.axvspan(xlo, thr, alpha=0.2, color=COLORS['accent']); ax.axvspan(thr, xhi, alpha=0.2, color=COLORS['danger'])
     ax.axvline(m_mean, color=COLORS['accent'], lw=2, ls=':', alpha=0.8, zorder=2)
     ax.text(m_mean - 0.002, 1.02, f'Member Mean\n{m_mean:.4f}', ha='right', va='bottom', fontsize=9, color=COLORS['accent'], transform=ax.get_xaxis_transform())
     ax.axvline(nm_mean, color=COLORS['danger'], lw=2, ls=':', alpha=0.8, zorder=2)
     ax.text(nm_mean + 0.002, 1.02, f'Non-Member Mean\n{nm_mean:.4f}', ha='left', va='bottom', fontsize=9, color=COLORS['danger'], transform=ax.get_xaxis_transform())
     ax.axvline(thr, color=COLORS['text_dim'], lw=2.5, ls='--', zorder=3)
     ax.text(thr, 1.25, f'Threshold\n{thr:.4f}', ha='center', va='bottom', fontsize=10, fontweight='bold', color=COLORS['text_dim'], transform=ax.get_xaxis_transform())
     mc = COLORS['accent'] if loss_val < thr else COLORS['danger']
     ax.plot(loss_val, 0.5, marker='o', ms=16, color='white', mec=mc, mew=3, zorder=5, transform=ax.get_xaxis_transform())
     ax.text(loss_val, 0.75, f'Current Loss\n{loss_val:.4f}', ha='center', fontsize=11, fontweight='bold', color=mc, transform=ax.get_xaxis_transform())
     ax.text((xlo+thr)/2, 0.25, 'MEMBER', ha='center', fontsize=12, color=COLORS['accent'], alpha=0.6, fontweight='bold', transform=ax.get_xaxis_transform())
     ax.text((thr+xhi)/2, 0.25, 'NON-MEMBER', ha='center', fontsize=12, color=COLORS['danger'], alpha=0.6, fontweight='bold', transform=ax.get_xaxis_transform())
     ax.set_xlim(xlo, xhi); ax.set_yticks([])
     for s in ax.spines.values(): s.set_visible(False)
-    ax.spines['bottom'].set_visible(True); ax.spines['bottom'].set_color(COLORS['grid']); ax.tick_params(colors=COLORS['text_dim'], width=1)
-    ax.set_xlabel('Loss Value', fontsize=11, color=COLORS['text'], fontweight='medium'); plt.tight_layout(pad=0.5)
     return fig
 def fig_auc_bar():
@@ -195,24 +221,66 @@ def fig_auc_bar():
 def fig_radar():
     ms = ['AUC', 'Atk Acc', 'Prec', 'Recall', 'F1', 'TPR@5%', 'TPR@1%', 'Gap']
-    mk = ['auc', 'attack_accuracy', 'precision', 'recall', 'f1', 'tpr_at_5fpr', 'tpr_at_1fpr', 'loss_gap']
-    N = len(ms); ag = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist() + [0]
-    fig, axes = plt.subplots(1, 2, figsize=(CHART_W + 2, 7), subplot_kw=dict(polar=True)); fig.patch.set_facecolor('white')
-    ls_cfgs = [("Baseline", "baseline", '#F04438'), (r"LS($\epsilon$=0.02)", "smooth_eps_0.02", '#B2DDFF'), (r"LS($\epsilon$=0.05)", "smooth_eps_0.05", '#84CAFF'), (r"LS($\epsilon$=0.1)", "smooth_eps_0.1", '#2E90FA'), (r"LS($\epsilon$=0.2)", "smooth_eps_0.2", '#7A5AF8')]
-    op_cfgs = [("Baseline", "baseline", '#F04438'), (r"OP($\sigma$=0.005)", "perturbation_0.005", '#A6F4C5'), (r"OP($\sigma$=0.01)", "perturbation_0.01", '#6CE9A6'), (r"OP($\sigma$=0.015)", "perturbation_0.015", '#32D583'), (r"OP($\sigma$=0.02)", "perturbation_0.02", '#12B76A'), (r"OP($\sigma$=0.025)", "perturbation_0.025", '#039855'), (r"OP($\sigma$=0.03)", "perturbation_0.03", '#027A48')]
-    for ax_idx, (ax, cfgs, title) in enumerate([(axes[0], ls_cfgs, 'Label Smoothing (5 models)'), (axes[1], op_cfgs, 'Output Perturbation (7 configs)')]):
-        ax.set_facecolor('white')
-        mx = [max(gm(k, m_key) for _, k, _ in cfgs) for m_key in mk]; mx = [m if m > 0 else 1 for m in mx]
         for nm, ky, cl in cfgs:
-            v = [gm(ky, m_key) / mx[i] for i, m_key in enumerate(mk)]; v += [v[0]]
-            ax.plot(ag, v, 'o-', lw=2.8 if ky == 'baseline' else 1.8, label=nm, color=cl, ms=5, alpha=0.95 if ky == 'baseline' else 0.85)
-            ax.fill(ag, v, alpha=0.10 if ky == 'baseline' else 0.04, color=cl)
-        ax.set_xticks(ag[:-1]); ax.set_xticklabels(ms, fontsize=10, color=COLORS['text']); ax.set_yticklabels([])
-        ax.set_title(title, fontsize=12, fontweight='700', color=COLORS['text'], pad=18)
-        ax.legend(loc='upper right', bbox_to_anchor=(1.35 if ax_idx == 1 else 1.30, 1.12), fontsize=9, framealpha=0.9, edgecolor=COLORS['grid'])
-        ax.spines['polar'].set_color(COLORS['grid']); ax.grid(color=COLORS['grid'], alpha=0.5)
     plt.tight_layout()
     return fig
@@ -239,7 +307,7 @@ def fig_perturb_dist():
         ax.hist(mp, bins=bins, alpha=0.6, color=COLORS['accent'], label='Mem+noise', density=True, edgecolor='white')
         ax.hist(np_, bins=bins, alpha=0.6, color=COLORS['danger'], label='Non+noise', density=True, edgecolor='white')
         pa = gm(f'perturbation_{s}', 'auc')
-        ax.set_title(f'OP($\sigma$={s})\nAUC={pa:.4f}', fontsize=11, fontweight='semibold'); ax.set_xlabel('Loss', fontsize=10)
         ax.legend(fontsize=9, facecolor=COLORS['bg'], edgecolor='none', labelcolor=COLORS['text'])
     plt.tight_layout(); return fig
@@ -259,7 +327,7 @@ def fig_roc_curves():
         fpr, tpr, _ = roc_curve(y_true, y_scores); ax.plot(fpr, tpr, color=COLORS['danger'], lw=2.5, label=f'Baseline (AUC={bl_auc:.4f})')
         for i, s in enumerate(OP_SIGMAS):
             rng_m = np.random.RandomState(42); rng_nm = np.random.RandomState(137); mp = ml_base + rng_m.normal(0, s, len(ml_base)); np_ = nl_base + rng_nm.normal(0, s, len(nl_base)); y_scores_p = np.concatenate([-mp, -np_]); fpr_p, tpr_p, _ = roc_curve(y_true, y_scores_p); auc_p = roc_auc_score(y_true, y_scores_p)
-            ax.plot(fpr_p, tpr_p, color=COLORS['op_colors'][i], lw=2, label=f'OP($\sigma$={s}) (AUC={auc_p:.4f})')
     ax.plot([0,1], [0,1], '--', color=COLORS['text_dim'], lw=1.5, label='Random'); ax.set_xlabel('False Positive Rate', fontsize=12, fontweight='medium'); ax.set_ylabel('True Positive Rate', fontsize=12, fontweight='medium'); ax.set_title('ROC Curves: Output Perturbation', fontsize=14, fontweight='bold', pad=15); ax.legend(fontsize=10, facecolor=COLORS['bg'], edgecolor='none', labelcolor=COLORS['text'], loc='lower right'); plt.tight_layout()
     return fig
@@ -452,12 +520,12 @@ def cb_eval(model_choice):
 def build_full_table():
     rows = []
-    for k, l in zip(LS_KEYS, LS_LABELS_UI):
         if k in mia_results:
             m = mia_results[k]; u = gu(k)
             t = "—" if k == "baseline" else "训练期"; d = "" if k == "baseline" else f"{m['auc']-bl_auc:+.4f}"
             rows.append(f"| {l} | {t} | {m['auc']:.4f} | {m['attack_accuracy']:.4f} | {m['precision']:.4f} | {m['recall']:.4f} | {m['f1']:.4f} | {m['tpr_at_5fpr']:.4f} | {m['tpr_at_1fpr']:.4f} | {m['loss_gap']:.4f} | {u:.1f}% | {d} |")
-    for k, l in zip(OP_KEYS, OP_LABELS_UI):
         if k in perturb_results:
             m = perturb_results[k]; d = f"{m['auc']-bl_auc:+.4f}"
             rows.append(f"| {l} | 推理期 | {m['auc']:.4f} | {m['attack_accuracy']:.4f} | {m['precision']:.4f} | {m['recall']:.4f} | {m['f1']:.4f} | {m['tpr_at_5fpr']:.4f} | {m['tpr_at_1fpr']:.4f} | {m['loss_gap']:.4f} | {bl_acc:.1f}% | {d} |")
@@ -539,7 +607,6 @@ with gr.Blocks(title="MIA攻防研究") as demo:
 * 🛡️ **输出扰动 (Output Perturbation, 推理期)**：给 AI 的输出加上“变声器”。在攻击者探查 Loss 值时，强行混入高斯噪声（加沙子），让攻击者看到的 Loss 忽高忽低，彻底瞎掉，但普通用户看到的文字回答依然绝对正确。
 """)
-        # 实验体系总览图 (如果在目录里则显示)
         if os.path.exists(os.path.join(BASE_DIR, "figures", "algo4_overview_cn_final.png")):
             gr.Image(os.path.join(BASE_DIR, "figures", "algo4_overview_cn_final.png"), label="实验体系总览", show_label=True)
@@ -764,7 +831,7 @@ with gr.Blocks(title="MIA攻防研究") as demo:
 ### 结论二：标签平滑是有效的训练期防御
-| 参数 | AUC | AUC降幅 | 效用 | 效用变化 |
 |---|---|---|---|---|
 | ε=0.02 | {gm('smooth_eps_0.02','auc'):.4f} | {bl_auc-gm('smooth_eps_0.02','auc'):.4f} | {gu('smooth_eps_0.02'):.1f}% | {gu('smooth_eps_0.02')-bl_acc:+.1f}% |
 | ε=0.05 | {gm('smooth_eps_0.05','auc'):.4f} | {bl_auc-gm('smooth_eps_0.05','auc'):.4f} | {gu('smooth_eps_0.05'):.1f}% | {gu('smooth_eps_0.05')-bl_acc:+.1f}% |
@@ -775,7 +842,7 @@ with gr.Blocks(title="MIA攻防研究") as demo:
 ### 结论三：输出扰动是有效的推理期防御
-| 参数 | AUC | AUC降幅 | 效用 |
 |---|---|---|---|
 | σ=0.005 | {gm('perturbation_0.005','auc'):.4f} | {bl_auc-gm('perturbation_0.005','auc'):.4f} | {bl_acc:.1f}% |
 | σ=0.01 | {gm('perturbation_0.01','auc'):.4f} | {bl_auc-gm('perturbation_0.01','auc'):.4f} | {bl_acc:.1f}% |
@@ -795,4 +862,4 @@ with gr.Blocks(title="MIA攻防研究") as demo:
 """)
-demo.launch()

 # ================================================================
+# 教育大模型MIA攻防研究 - Gradio演示系统 v6.2 学术巅峰版 (苹果风)
+# 整合了双雷达图 + 算法流程图 + 伪代码 + 详尽数据分析 + 完整结论
+# ！！！全局修正：所有 e 替换为 ε / $\epsilon$，所有 s 替换为 σ / $\sigma$ ！！！
 # ================================================================
 import os
     text = re.sub(r'[\u200b-\u200f\u2028-\u202f\u2060-\u206f\ufeff]', '', text)
     return text.strip()
+# 尝试加载数据，如果不存在则使用虚拟数据以确保运行
 try:
     member_data = load_json("data/member.json")
     non_member_data = load_json("data/non_member.json")
     for s in [0.005, 0.01, 0.015, 0.02, 0.025, 0.03]:
         k = f"perturbation_{s}"
         perturb_results[k] = {m: v*0.85 for m, v in mia_results["baseline"].items()}
+        # 模拟方差变大
         perturb_results[k]["member_loss_std"] = np.sqrt(0.03**2 + s**2)
         perturb_results[k]["non_member_loss_std"] = np.sqrt(0.03**2 + s**2)
 # ================================================================
+# 全局图表配置 - 简约苹果风
 # ================================================================
 COLORS = {
     'bg': '#FFFFFF',
     'ls_colors': ['#A0C4FF', '#70A1FF', '#478EFF', '#007AFF'],
     'op_colors': ['#98F5E1', '#6EE7B7', '#34D399', '#10B981', '#059669', '#047857'],
 }
+# 图表宽度配置 (为了适配双雷达图)
 CHART_W = 14
 def apply_light_style(fig, ax_or_axes):
         ax.grid(True, color=COLORS['grid'], alpha=0.6, linestyle='-', linewidth=0.8)
         ax.set_axisbelow(True)
+# ================================================================
+# 提取指标的辅助函数 (核心替换：使用 LaTeX \epsilon 和 \sigma 画图)
+# ================================================================
 LS_KEYS = ["baseline", "smooth_eps_0.02", "smooth_eps_0.05", "smooth_eps_0.1", "smooth_eps_0.2"]
 LS_LABELS_PLOT = ["Baseline", r"LS($\epsilon$=0.02)", r"LS($\epsilon$=0.05)", r"LS($\epsilon$=0.1)", r"LS($\epsilon$=0.2)"]
+LS_LABELS_MD = ["基线(Baseline)", "LS(ε=0.02)", "LS(ε=0.05)", "LS(ε=0.1)", "LS(ε=0.2)"]
 OP_SIGMAS = [0.005, 0.01, 0.015, 0.02, 0.025, 0.03]
 OP_KEYS = [f"perturbation_{s}" for s in OP_SIGMAS]
+OP_LABELS_PLOT = [r"OP($\sigma$={})".format(s) for s in OP_SIGMAS]
+OP_LABELS_MD = [f"OP(σ={s})" for s in OP_SIGMAS]
 ALL_KEYS = LS_KEYS + OP_KEYS
 bl_m_mean = gm("baseline", "member_loss_mean")
 bl_nm_mean = gm("baseline", "non_member_loss_mean")
+TYPE_CN = {'calculation': '基础计算', 'word_problem': '应用题',
+           'concept': '概念问答', 'error_correction': '错题订正'}
+# ================================================================
+# 效用评估题库
+# ================================================================
 np.random.seed(777)
 EVAL_POOL = []
 _types = ['calculation']*120 + ['word_problem']*90 + ['concept']*60 + ['error_correction']*30
         else: _q,_ans=f"{_a} x {_b} = ?",str(_a*_b)
     elif _t == 'word_problem':
         _a,_b = int(np.random.randint(5,200)), int(np.random.randint(3,50))
+        _tpls = [(f"{_a} apples, ate {_b}, left?",str(_a-_b)),
+                 (f"{_a} per group, {_b} groups, total?",str(_a*_b))]
         _q,_ans = _tpls[_i%len(_tpls)]
     elif _t == 'concept':
         _cs = [("area","Area = space occupied by a shape"),("perimeter","Perimeter = total boundary length")]
     EVAL_POOL.append(item)
 # ================================================================
+# 图表绘制函数 (全面应用 LaTeX 标签渲染)
 # ================================================================
 def fig_gauge(loss_val, m_mean, nm_mean, thr, m_std, nm_std):
+    fig, ax = plt.subplots(figsize=(10, 2.6))
+    fig.patch.set_facecolor(COLORS['bg'])
+    ax.set_facecolor(COLORS['panel'])
+    xlo = min(m_mean - 3.0 * m_std, loss_val - 0.005)
+    xhi = max(nm_mean + 3.0 * nm_std, loss_val + 0.005)
+    ax.axvspan(xlo, thr, alpha=0.2, color=COLORS['accent'])
+    ax.axvspan(thr, xhi, alpha=0.2, color=COLORS['danger'])
     ax.axvline(m_mean, color=COLORS['accent'], lw=2, ls=':', alpha=0.8, zorder=2)
     ax.text(m_mean - 0.002, 1.02, f'Member Mean\n{m_mean:.4f}', ha='right', va='bottom', fontsize=9, color=COLORS['accent'], transform=ax.get_xaxis_transform())
     ax.axvline(nm_mean, color=COLORS['danger'], lw=2, ls=':', alpha=0.8, zorder=2)
     ax.text(nm_mean + 0.002, 1.02, f'Non-Member Mean\n{nm_mean:.4f}', ha='left', va='bottom', fontsize=9, color=COLORS['danger'], transform=ax.get_xaxis_transform())
     ax.axvline(thr, color=COLORS['text_dim'], lw=2.5, ls='--', zorder=3)
     ax.text(thr, 1.25, f'Threshold\n{thr:.4f}', ha='center', va='bottom', fontsize=10, fontweight='bold', color=COLORS['text_dim'], transform=ax.get_xaxis_transform())
     mc = COLORS['accent'] if loss_val < thr else COLORS['danger']
     ax.plot(loss_val, 0.5, marker='o', ms=16, color='white', mec=mc, mew=3, zorder=5, transform=ax.get_xaxis_transform())
     ax.text(loss_val, 0.75, f'Current Loss\n{loss_val:.4f}', ha='center', fontsize=11, fontweight='bold', color=mc, transform=ax.get_xaxis_transform())
     ax.text((xlo+thr)/2, 0.25, 'MEMBER', ha='center', fontsize=12, color=COLORS['accent'], alpha=0.6, fontweight='bold', transform=ax.get_xaxis_transform())
     ax.text((thr+xhi)/2, 0.25, 'NON-MEMBER', ha='center', fontsize=12, color=COLORS['danger'], alpha=0.6, fontweight='bold', transform=ax.get_xaxis_transform())
     ax.set_xlim(xlo, xhi); ax.set_yticks([])
     for s in ax.spines.values(): s.set_visible(False)
+    ax.spines['bottom'].set_visible(True); ax.spines['bottom'].set_color(COLORS['grid'])
+    ax.tick_params(colors=COLORS['text_dim'], width=1)
+    ax.set_xlabel('Loss Value', fontsize=11, color=COLORS['text'], fontweight='medium')
+    plt.tight_layout(pad=0.5)
     return fig
 def fig_auc_bar():
 def fig_radar():
     ms = ['AUC', 'Atk Acc', 'Prec', 'Recall', 'F1', 'TPR@5%', 'TPR@1%', 'Gap']
+    mk = ['auc', 'attack_accuracy', 'precision', 'recall', 'f1',
+          'tpr_at_5fpr', 'tpr_at_1fpr', 'loss_gap']
+    N = len(ms)
+    ag = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist() + [0]
+    fig, axes = plt.subplots(1, 2, figsize=(CHART_W + 2, 7),
+                              subplot_kw=dict(polar=True))
+    fig.patch.set_facecolor('white')
+    # --- 左图: 5个标签平滑模型 (替换LaTeX) ---
+    ls_cfgs = [
+        ("Baseline",             "baseline",         '#F04438'),
+        (r"LS($\epsilon$=0.02)", "smooth_eps_0.02",  '#B2DDFF'),
+        (r"LS($\epsilon$=0.05)", "smooth_eps_0.05",  '#84CAFF'),
+        (r"LS($\epsilon$=0.1)",  "smooth_eps_0.1",   '#2E90FA'),
+        (r"LS($\epsilon$=0.2)",  "smooth_eps_0.2",   '#7A5AF8'),
+    ]
+    # --- 右图: Baseline + 6个输出扰动 (替换LaTeX) ---
+    op_cfgs = [
+        ("Baseline",               "baseline",            '#F04438'),
+        (r"OP($\sigma$=0.005)",    "perturbation_0.005",  '#A6F4C5'),
+        (r"OP($\sigma$=0.01)",     "perturbation_0.01",   '#6CE9A6'),
+        (r"OP($\sigma$=0.015)",    "perturbation_0.015",  '#32D583'),
+        (r"OP($\sigma$=0.02)",     "perturbation_0.02",   '#12B76A'),
+        (r"OP($\sigma$=0.025)",    "perturbation_0.025",  '#039855'),
+        (r"OP($\sigma$=0.03)",     "perturbation_0.03",   '#027A48'),
+    ]
+    for ax_idx, (ax, cfgs, title) in enumerate([
+        (axes[0], ls_cfgs, 'Label Smoothing (5 models)'),
+        (axes[1], op_cfgs, 'Output Perturbation (7 configs)')
+    ]):
+        ax.set_facecolor('white')
+        mx = []
+        for i, m_key in enumerate(mk):
+            val_max = max(gm(k, m_key) for _, k, _ in cfgs)
+            mx.append(val_max if val_max > 0 else 1)
         for nm, ky, cl in cfgs:
+            v = [gm(ky, m_key) / mx[i] for i, m_key in enumerate(mk)]
+            v += [v[0]]  # 闭合
+            lw = 2.8 if ky == 'baseline' else 1.8
+            alpha_fill = 0.10 if ky == 'baseline' else 0.04
+            ax.plot(ag, v, 'o-', lw=lw, label=nm, color=cl, ms=5,
+                    alpha=0.95 if ky == 'baseline' else 0.85)
+            ax.fill(ag, v, alpha=alpha_fill, color=cl)
+        ax.set_xticks(ag[:-1])
+        ax.set_xticklabels(ms, fontsize=9, color=COLORS['text'])
+        ax.set_yticklabels([])
+        ax.set_title(title, fontsize=11, fontweight='700',
+                     color=COLORS['text'], pad=18)
+        ax.legend(loc='upper right',
+                  bbox_to_anchor=(1.35 if ax_idx == 1 else 1.30, 1.12),
+                  fontsize=8, framealpha=0.9, edgecolor=COLORS['grid'])
+        ax.spines['polar'].set_color(COLORS['grid'])
+        ax.grid(color=COLORS['grid'], alpha=0.5)
     plt.tight_layout()
     return fig
         ax.hist(mp, bins=bins, alpha=0.6, color=COLORS['accent'], label='Mem+noise', density=True, edgecolor='white')
         ax.hist(np_, bins=bins, alpha=0.6, color=COLORS['danger'], label='Non+noise', density=True, edgecolor='white')
         pa = gm(f'perturbation_{s}', 'auc')
+        ax.set_title(r'OP($\sigma$={})'.format(s) + f'\nAUC={pa:.4f}', fontsize=11, fontweight='semibold'); ax.set_xlabel('Loss', fontsize=10)
         ax.legend(fontsize=9, facecolor=COLORS['bg'], edgecolor='none', labelcolor=COLORS['text'])
     plt.tight_layout(); return fig
         fpr, tpr, _ = roc_curve(y_true, y_scores); ax.plot(fpr, tpr, color=COLORS['danger'], lw=2.5, label=f'Baseline (AUC={bl_auc:.4f})')
         for i, s in enumerate(OP_SIGMAS):
             rng_m = np.random.RandomState(42); rng_nm = np.random.RandomState(137); mp = ml_base + rng_m.normal(0, s, len(ml_base)); np_ = nl_base + rng_nm.normal(0, s, len(nl_base)); y_scores_p = np.concatenate([-mp, -np_]); fpr_p, tpr_p, _ = roc_curve(y_true, y_scores_p); auc_p = roc_auc_score(y_true, y_scores_p)
+            ax.plot(fpr_p, tpr_p, color=COLORS['op_colors'][i], lw=2, label=r'OP($\sigma$={}) (AUC={:.4f})'.format(s, auc_p))
     ax.plot([0,1], [0,1], '--', color=COLORS['text_dim'], lw=1.5, label='Random'); ax.set_xlabel('False Positive Rate', fontsize=12, fontweight='medium'); ax.set_ylabel('True Positive Rate', fontsize=12, fontweight='medium'); ax.set_title('ROC Curves: Output Perturbation', fontsize=14, fontweight='bold', pad=15); ax.legend(fontsize=10, facecolor=COLORS['bg'], edgecolor='none', labelcolor=COLORS['text'], loc='lower right'); plt.tight_layout()
     return fig
 def build_full_table():
     rows = []
+    for k, l in zip(LS_KEYS, LS_LABELS_MD):
         if k in mia_results:
             m = mia_results[k]; u = gu(k)
             t = "—" if k == "baseline" else "训练期"; d = "" if k == "baseline" else f"{m['auc']-bl_auc:+.4f}"
             rows.append(f"| {l} | {t} | {m['auc']:.4f} | {m['attack_accuracy']:.4f} | {m['precision']:.4f} | {m['recall']:.4f} | {m['f1']:.4f} | {m['tpr_at_5fpr']:.4f} | {m['tpr_at_1fpr']:.4f} | {m['loss_gap']:.4f} | {u:.1f}% | {d} |")
+    for k, l in zip(OP_KEYS, OP_LABELS_MD):
         if k in perturb_results:
             m = perturb_results[k]; d = f"{m['auc']-bl_auc:+.4f}"
             rows.append(f"| {l} | 推理期 | {m['auc']:.4f} | {m['attack_accuracy']:.4f} | {m['precision']:.4f} | {m['recall']:.4f} | {m['f1']:.4f} | {m['tpr_at_5fpr']:.4f} | {m['tpr_at_1fpr']:.4f} | {m['loss_gap']:.4f} | {bl_acc:.1f}% | {d} |")
 * 🛡️ **输出扰动 (Output Perturbation, 推理期)**：给 AI 的输出加上“变声器”。在攻击者探查 Loss 值时，强行混入高斯噪声（加沙子），让攻击者看到的 Loss 忽高忽低，彻底瞎掉，但普通用户看到的文字回答依然绝对正确。
 """)
         if os.path.exists(os.path.join(BASE_DIR, "figures", "algo4_overview_cn_final.png")):
             gr.Image(os.path.join(BASE_DIR, "figures", "algo4_overview_cn_final.png"), label="实验体系总览", show_label=True)
 ### 结论二：标签平滑是有效的训练期防御
+| ε 参数 | AUC | AUC降幅 | 效用 | 效用变化 |
 |---|---|---|---|---|
 | ε=0.02 | {gm('smooth_eps_0.02','auc'):.4f} | {bl_auc-gm('smooth_eps_0.02','auc'):.4f} | {gu('smooth_eps_0.02'):.1f}% | {gu('smooth_eps_0.02')-bl_acc:+.1f}% |
 | ε=0.05 | {gm('smooth_eps_0.05','auc'):.4f} | {bl_auc-gm('smooth_eps_0.05','auc'):.4f} | {gu('smooth_eps_0.05'):.1f}% | {gu('smooth_eps_0.05')-bl_acc:+.1f}% |
 ### 结论三：输出扰动是有效的推理期防御
+| σ 参数 | AUC | AUC降幅 | 效用 |
 |---|---|---|---|
 | σ=0.005 | {gm('perturbation_0.005','auc'):.4f} | {bl_auc-gm('perturbation_0.005','auc'):.4f} | {bl_acc:.1f}% |
 | σ=0.01 | {gm('perturbation_0.01','auc'):.4f} | {bl_auc-gm('perturbation_0.01','auc'):.4f} | {bl_acc:.1f}% |
 """)
+demo.launch(theme=gr.themes.Soft(), css=CSS)