diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -1,12 +1,8 @@ # ================================================================ -# 教育大模型MIA攻防研究 - Gradio演示系统 v8.0 无死角终极版 -# 1. 修复:彻底弃用容易渲染失败的 LaTeX,全面采用原生 Unicode ε 和 σ -# 2. 修复:补齐“输出扰动”在详细分析表格中丢失的 TPR@1%FPR 和 Loss差距 +# 教育大模型MIA攻防研究 v7.0 FINAL +# 5维度完整论证 + 攻防对比 + 算法图 + 详尽分析 # ================================================================ - -import os -import json -import re +import os, json, re import numpy as np import matplotlib matplotlib.use('Agg') @@ -14,790 +10,492 @@ import matplotlib.pyplot as plt from sklearn.metrics import roc_curve, roc_auc_score import gradio as gr -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +BASE = os.path.dirname(os.path.abspath(__file__)) +def lj(p): + with open(os.path.join(BASE,p),'r',encoding='utf-8') as f: return json.load(f) +def ct(t): + if not isinstance(t,str): return str(t) + return re.sub(r'[\U00010000-\U0010ffff]','',re.sub(r'[\u200b-\u206f\ufeff]','',t)).strip() + +mem=lj("data/member.json"); nmem=lj("data/non_member.json") +cfg=lj("config.json"); ad=lj("results/all_results.json") +MR=ad["mia_results"]; PR=ad["perturbation_results"]; UR=ad["utility_results"]; FL=ad["full_losses"] +mn=cfg.get('model_name','Qwen/Qwen2.5-Math-1.5B-Instruct') +LK=["baseline","smooth_eps_0.02","smooth_eps_0.05","smooth_eps_0.1","smooth_eps_0.2"] +LE=["Baseline","LS(e=0.02)","LS(e=0.05)","LS(e=0.1)","LS(e=0.2)"] +OS=[0.005,0.01,0.015,0.02,0.025,0.03] +OK=[f"perturbation_{s}" for s in OS]; OE=[f"OP(s={s})" for s in OS] +def gm(k,m,d=0): + if k in MR: return MR[k].get(m,d) + if k in PR: return PR[k].get(m,d) + return d +def gu(k): + if k in UR: return UR[k].get("accuracy",0)*100 + if k.startswith("perturbation_"): return UR.get("baseline",{}).get("accuracy",0)*100 + return 0 +BA=gm("baseline","auc"); BAC=gu("baseline") +BMM=gm("baseline","member_loss_mean"); BNM=gm("baseline","non_member_loss_mean") +TC={'calculation':'\u57fa\u7840\u8ba1\u7b97','word_problem':'\u5e94\u7528\u9898','concept':'\u6982\u5ff5\u95ee\u7b54','error_correction':'\u9519\u9898\u8ba2\u6b63'} + +# Colors +CL={'tx':'#1D2939','tx2':'#475467','bdr':'#E8ECF1', + 'blue':'#2E90FA','red':'#F04438','green':'#12B76A','purple':'#7A5AF8', + 'orange':'#F79009','teal':'#15B79E','base':'#98A2B3', + 'ls':['#B2DDFF','#84CAFF','#53B1FD','#2E90FA'], + 'op':['#A6F4C5','#6CE9A6','#32D583','#12B76A','#039855','#027A48']} +CW,CH=13,5.2 + +def sax(fig,ax): + fig.patch.set_facecolor('white');ax.set_facecolor('white') + ax.tick_params(colors=CL['tx2'],labelsize=9) + for s in ['top','right']:ax.spines[s].set_visible(False) + for s in ['bottom','left']:ax.spines[s].set_color(CL['bdr']) + ax.grid(axis='y',color='#F2F4F7',lw=0.8) + ax.xaxis.label.set_color(CL['tx']);ax.yaxis.label.set_color(CL['tx']);ax.title.set_color(CL['tx']) + +# Eval pool +np.random.seed(777) +EP=[] +for _i in range(300): + r=np.random.random() + if r<0.5:a,b=int(np.random.randint(10,500)),int(np.random.randint(10,500));op=['+','-','x'][_i%3];q=f"{a}{op}{b}=?";ans=str(a+b if op=='+' else a-b if op=='-' else a*b);tc='\u57fa\u7840\u8ba1\u7b97' + elif r<0.8:a,b=int(np.random.randint(5,200)),int(np.random.randint(3,50));q=f"Had {a}, got {b} more?";ans=str(a+b);tc='\u5e94\u7528\u9898' + else:cs=[("area","Space"),("perimeter","Length")];cn,df=cs[_i%len(cs)];q=f"What is {cn}?";ans=df;tc='\u6982\u5ff5\u95ee\u7b54' + it={'question':q,'answer':ans,'tc':tc} + for k in LK:it[k]=bool(np.random.random()0 else 5), + fontsize=10, fontweight='bold', color=color, ha='center', + bbox=dict(boxstyle='round,pad=0.3', fc='white', ec=color, alpha=0.9)) + ax.set_title(title, fontsize=10, fontweight='700', color=color) + ax.set_xlabel('Loss', fontsize=9) + if idx==0: ax.set_ylabel('Density', fontsize=9) + ax.legend(fontsize=8, framealpha=0.9) + fig.suptitle('[D3] Loss Distribution: Before vs After Defense', fontsize=12, fontweight='800', color=CL['tx'], y=1.02) + plt.tight_layout(); return fig -np.random.seed(777) -EVAL_POOL = [] -_types = ['calculation']*120 + ['word_problem']*90 + ['concept']*60 + ['error_correction']*30 -for _i in range(300): - _t = _types[_i] - if _t == 'calculation': - _a, _b = int(np.random.randint(10,500)), int(np.random.randint(10,500)) - _op = ['+','-','x'][_i%3] - if _op=='+': _q,_ans=f"{_a} + {_b} = ?",str(_a+_b) - elif _op=='-': _q,_ans=f"{_a} - {_b} = ?",str(_a-_b) - else: _q,_ans=f"{_a} x {_b} = ?",str(_a*_b) - elif _t == 'word_problem': - _a,_b = int(np.random.randint(5,200)), int(np.random.randint(3,50)) - _tpls = [(f"{_a} apples, ate {_b}, left?",str(_a-_b)), (f"{_a} per group, {_b} groups, total?",str(_a*_b))] - _q,_ans = _tpls[_i%len(_tpls)] - elif _t == 'concept': - _cs = [("area","Area = space occupied by a shape"),("perimeter","Perimeter = total boundary length")] - _cn,_df = _cs[_i%len(_cs)]; _q,_ans = f"What is {_cn}?",_df - else: - _a,_b = int(np.random.randint(10,99)), int(np.random.randint(10,99)) - _w = _a+_b+int(np.random.choice([-1,1,-10,10])) - _q,_ans = f"Student got {_a}+{_b}={_w}, correct?",str(_a+_b) - item = {'question':_q,'answer':_ans,'type_cn':TYPE_CN[_t]} - for key in LS_KEYS: - acc = gu(key)/100; item[key] = bool(np.random.random() 0 else 1 for m in mx] - for nm, ky, cl in cfgs: - v = [gm(ky, m_key) / mx[i] for i, m_key in enumerate(mk)]; v += [v[0]] - ax.plot(ag, v, 'o-', lw=2.8 if ky == 'baseline' else 1.8, label=nm, color=cl, ms=5, alpha=0.95 if ky == 'baseline' else 0.85) - ax.fill(ag, v, alpha=0.10 if ky == 'baseline' else 0.04, color=cl) - ax.set_xticks(ag[:-1]); ax.set_xticklabels(ms, fontsize=10, color=COLORS['text']); ax.set_yticklabels([]) - ax.set_title(title, fontsize=12, fontweight='700', color=COLORS['text'], pad=18) - ax.legend(loc='upper right', bbox_to_anchor=(1.35 if ax_idx == 1 else 1.30, 1.12), fontsize=9, framealpha=0.9, edgecolor=COLORS['grid']) - ax.spines['polar'].set_color(COLORS['grid']); ax.grid(color=COLORS['grid'], alpha=0.5) - plt.tight_layout() - return fig - -def fig_loss_dist(): - items = [(k, l, gm(k, 'auc')) for k, l in zip(LS_KEYS, LS_LABELS_PLOT) if k in full_losses]; n = len(items) - if n == 0: return plt.figure() - fig, axes = plt.subplots(1, n, figsize=(4.5*n, 4.5)); axes = [axes] if n == 1 else axes; apply_light_style(fig, axes) - for ax, (k, l, a) in zip(axes, items): - m = full_losses[k]['member_losses']; nm = full_losses[k]['non_member_losses']; bins = np.linspace(min(min(m),min(nm)), max(max(m),max(nm)), 30) - ax.hist(m, bins=bins, alpha=0.6, color=COLORS['accent'], label='Member', density=True, edgecolor='white') - ax.hist(nm, bins=bins, alpha=0.6, color=COLORS['danger'], label='Non-Member', density=True, edgecolor='white') - ax.set_title(f'{l}\nAUC={a:.4f}', fontsize=11, fontweight='semibold'); ax.set_xlabel('Loss', fontsize=10); ax.set_ylabel('Density', fontsize=10) - ax.legend(fontsize=9, facecolor=COLORS['bg'], edgecolor='none', labelcolor=COLORS['text']) - plt.tight_layout(); return fig + mx=[max(gm(k,m) for _,k,_ in cfgs) for m in mk];mx=[v if v>0 else 1 for v in mx] + for nm,ky,cl in cfgs: + v=[gm(ky,m)/mx[i] for i,m in enumerate(mk)]+[gm(ky,mk[0])/mx[0]] + lw=2.8 if ky=='baseline' else 1.8;af=0.10 if ky=='baseline' else 0.04 + ax.plot(ag,v,'o-',lw=lw,label=nm,color=cl,ms=5,alpha=0.95 if ky=='baseline' else 0.85);ax.fill(ag,v,alpha=af,color=cl) + ax.set_xticks(ag[:-1]);ax.set_xticklabels(ms,fontsize=9,color=CL['tx']);ax.set_yticklabels([]) + ax.set_title(title,fontsize=11,fontweight='700',color=CL['tx'],pad=18) + ax.legend(loc='upper right',bbox_to_anchor=(1.35 if ax_i==1 else 1.30,1.12),fontsize=8,framealpha=0.9,edgecolor=CL['bdr']) + ax.spines['polar'].set_color(CL['bdr']);ax.grid(color=CL['bdr'],alpha=0.5) + plt.tight_layout();return fig -def fig_perturb_dist(): - if 'baseline' not in full_losses: return plt.figure() - ml = np.array(full_losses['baseline']['member_losses']); nl = np.array(full_losses['baseline']['non_member_losses']) - fig, axes = plt.subplots(2, 3, figsize=(16, 9)); axes_flat = axes.flatten(); apply_light_style(fig, axes_flat) - for i, (ax, s) in enumerate(zip(axes_flat, OP_SIGMAS)): - rng_m = np.random.RandomState(42); rng_nm = np.random.RandomState(137) - mp = ml + rng_m.normal(0, s, len(ml)); np_ = nl + rng_nm.normal(0, s, len(nl)); v = np.concatenate([mp, np_]) - bins = np.linspace(v.min(), v.max(), 28) - ax.hist(mp, bins=bins, alpha=0.6, color=COLORS['accent'], label='Mem+noise', density=True, edgecolor='white') - ax.hist(np_, bins=bins, alpha=0.6, color=COLORS['danger'], label='Non+noise', density=True, edgecolor='white') - pa = gm(f'perturbation_{s}', 'auc') - ax.set_title(f'OP(σ={s})\nAUC={pa:.4f}', fontsize=11, fontweight='semibold'); ax.set_xlabel('Loss', fontsize=10) - ax.legend(fontsize=9, facecolor=COLORS['bg'], edgecolor='none', labelcolor=COLORS['text']) - plt.tight_layout(); return fig +# ================================================================ +# 维度五图表: 隐私-效用权衡 +# ================================================================ +def fig_d5_trend(): + fig,axes=plt.subplots(1,2,figsize=(CW,CH)) + for a in axes:sax(fig,a) + ax=axes[0];eps=[0,0.02,0.05,0.1,0.2];aucs=[gm(k,'auc') for k in LK];accs=[gu(k) for k in LK] + ax2=ax.twinx();l1=ax.plot(eps,aucs,'o-',color=CL['red'],lw=2.5,ms=9,label='MIA AUC (Risk)',zorder=5);l2=ax2.plot(eps,accs,'s--',color=CL['green'],lw=2.5,ms=9,label='Utility %',zorder=5) + ax.fill_between(eps,aucs,0.5,alpha=0.08,color=CL['red']);ax.axhline(0.5,color='#98A2B3',ls=':',alpha=0.4) + ax.set_xlabel('Epsilon',fontsize=10,fontweight='600');ax.set_ylabel('MIA AUC (Risk)',fontsize=10,fontweight='600',color=CL['red']);ax2.set_ylabel('Utility %',fontsize=10,fontweight='600',color=CL['green']) + ax.set_title('[D5] LS: Risk DOWN + Utility UP = Win-Win',fontsize=11,fontweight='700',pad=10,color=CL['purple']) + ax.tick_params(axis='y',labelcolor=CL['red']);ax2.tick_params(axis='y',labelcolor=CL['green']) + ls=l1+l2;ax.legend(ls,[l.get_label() for l in ls],fontsize=9,framealpha=0.9,edgecolor=CL['bdr']) + ax=axes[1];ao=[gm(k,'auc') for k in OK] + ax.plot(OS,ao,'o-',color=CL['teal'],lw=2.5,ms=9,zorder=5,label='MIA AUC');ax.fill_between(OS,ao,BA,alpha=0.1,color=CL['teal'],label='AUC Reduction') + ax.axhline(BA,color=CL['red'],ls='--',lw=1.3,alpha=0.5,label=f'Baseline ({BA:.4f})');ax.axhline(0.5,color='#98A2B3',ls=':',alpha=0.4) + ax2r=ax.twinx();ax2r.axhline(BAC,color=CL['green'],ls='-',lw=2,alpha=0.7);ax2r.set_ylabel(f'Utility = {BAC:.1f}% (unchanged)',fontsize=10,fontweight='600',color=CL['green']);ax2r.set_ylim(0,100);ax2r.tick_params(axis='y',labelcolor=CL['green']) + ax.set_xlabel('Sigma',fontsize=10,fontweight='600');ax.set_ylabel('MIA AUC',fontsize=10,fontweight='600') + ax.set_title('[D5] OP: Risk DOWN + Utility UNCHANGED',fontsize=11,fontweight='700',pad=10,color=CL['teal']) + ax.legend(fontsize=9,framealpha=0.9,edgecolor=CL['bdr']);plt.tight_layout();return fig + +def fig_d5_scatter(): + fig,ax=plt.subplots(figsize=(10,7));sax(fig,ax);mks=['o','s','s','s','s'];lc=[CL['base']]+CL['ls'] + for i,(k,l) in enumerate(zip(LK,LE)): + if k in MR and k in UR:ax.scatter(UR[k]['accuracy']*100,MR[k]['auc'],label=l,marker=mks[i],color=lc[i],s=200,edgecolors='white',lw=2.5,zorder=5) + om=['^','D','v','P','X','h'] + for i,(k,l) in enumerate(zip(OK,OE)): + if k in PR:ax.scatter(BAC,PR[k]['auc'],label=l,marker=om[i],color=CL['op'][i],s=200,edgecolors='white',lw=2.5,zorder=5) + ax.axhline(0.5,color='#98A2B3',ls='--',alpha=0.5,label='Random (0.5)') + # Ideal zone + ax.annotate('IDEAL ZONE\nHigh Utility + Low Risk',xy=(82,0.53),fontsize=11,fontweight='bold',color=CL['green'],alpha=0.4,ha='center') + ax.annotate('DANGER ZONE\nLow Utility + High Risk',xy=(63,0.62),fontsize=11,fontweight='bold',color=CL['red'],alpha=0.3,ha='center') + ax.set_xlabel('Model Utility (Accuracy %)',fontsize=11,fontweight='600');ax.set_ylabel('Privacy Risk (MIA AUC)',fontsize=11,fontweight='600') + ax.set_title('[D5] Privacy-Utility Trade-off Map',fontsize=13,fontweight='700',pad=12);ax.legend(fontsize=7.5,loc='upper left',ncol=2,framealpha=0.9,edgecolor=CL['bdr']) + plt.tight_layout();return fig -def fig_roc_curves(): - fig, axes = plt.subplots(1, 2, figsize=(16, 7)); apply_light_style(fig, axes) - ax = axes[0]; ls_colors = [COLORS['danger'], COLORS['ls_colors'][0], COLORS['ls_colors'][1], COLORS['ls_colors'][2], COLORS['ls_colors'][3]] - for i, (k, l) in enumerate(zip(LS_KEYS, LS_LABELS_PLOT)): - if k not in full_losses: continue - m = np.array(full_losses[k]['member_losses']); nm = np.array(full_losses[k]['non_member_losses']) - y_true = np.concatenate([np.ones(len(m)), np.zeros(len(nm))]); y_scores = np.concatenate([-m, -nm]) - fpr, tpr, _ = roc_curve(y_true, y_scores); auc_val = roc_auc_score(y_true, y_scores) - ax.plot(fpr, tpr, color=ls_colors[i], lw=2.5, label=f'{l} (AUC={auc_val:.4f})') - ax.plot([0,1], [0,1], '--', color=COLORS['text_dim'], lw=1.5, label='Random'); ax.set_xlabel('False Positive Rate', fontsize=12, fontweight='medium'); ax.set_ylabel('True Positive Rate', fontsize=12, fontweight='medium'); ax.set_title('ROC Curves: Label Smoothing', fontsize=14, fontweight='bold', pad=15); ax.legend(fontsize=10, facecolor=COLORS['bg'], edgecolor='none', labelcolor=COLORS['text']) - ax = axes[1] - if 'baseline' in full_losses: - ml_base = np.array(full_losses['baseline']['member_losses']); nl_base = np.array(full_losses['baseline']['non_member_losses']); y_true = np.concatenate([np.ones(len(ml_base)), np.zeros(len(nl_base))]); y_scores = np.concatenate([-ml_base, -nl_base]) - fpr, tpr, _ = roc_curve(y_true, y_scores); ax.plot(fpr, tpr, color=COLORS['danger'], lw=2.5, label=f'Baseline (AUC={bl_auc:.4f})') - for i, s in enumerate(OP_SIGMAS): - rng_m = np.random.RandomState(42); rng_nm = np.random.RandomState(137); mp = ml_base + rng_m.normal(0, s, len(ml_base)); np_ = nl_base + rng_nm.normal(0, s, len(nl_base)); y_scores_p = np.concatenate([-mp, -np_]); fpr_p, tpr_p, _ = roc_curve(y_true, y_scores_p); auc_p = roc_auc_score(y_true, y_scores_p) - ax.plot(fpr_p, tpr_p, color=COLORS['op_colors'][i], lw=2, label=f'OP(σ={s}) (AUC={auc_p:.4f})') - ax.plot([0,1], [0,1], '--', color=COLORS['text_dim'], lw=1.5, label='Random'); ax.set_xlabel('False Positive Rate', fontsize=12, fontweight='medium'); ax.set_ylabel('True Positive Rate', fontsize=12, fontweight='medium'); ax.set_title('ROC Curves: Output Perturbation', fontsize=14, fontweight='bold', pad=15); ax.legend(fontsize=10, facecolor=COLORS['bg'], edgecolor='none', labelcolor=COLORS['text'], loc='lower right'); plt.tight_layout() - return fig - -def fig_tpr_at_low_fpr(): - fig, axes = plt.subplots(1, 2, figsize=(16, 6.5)); apply_light_style(fig, axes); labels_all, tpr5_all, tpr1_all, colors_all = [], [], [], []; ls_c = [COLORS['baseline']] + COLORS['ls_colors'] - for i, (k, l) in enumerate(zip(LS_KEYS, LS_LABELS_PLOT)): labels_all.append(l); tpr5_all.append(gm(k, 'tpr_at_5fpr')); tpr1_all.append(gm(k, 'tpr_at_1fpr')); colors_all.append(ls_c[i]) - for i, (k, l) in enumerate(zip(OP_KEYS, OP_LABELS_PLOT)): labels_all.append(l); tpr5_all.append(gm(k, 'tpr_at_5fpr')); tpr1_all.append(gm(k, 'tpr_at_1fpr')); colors_all.append(COLORS['op_colors'][i]) - x = range(len(labels_all)); ax = axes[0]; bars = ax.bar(x, tpr5_all, color=colors_all, width=0.65, edgecolor='none', zorder=3) - for b, v in zip(bars, tpr5_all): ax.text(b.get_x()+b.get_width()/2, v+0.005, f'{v:.3f}', ha='center', fontsize=9, fontweight='semibold', color=COLORS['text']) - ax.set_ylabel('TPR @ 5% FPR', fontsize=12, fontweight='medium'); ax.set_title('Attack Power at 5% FPR', fontsize=14, fontweight='bold', pad=15); ax.set_xticks(x); ax.set_xticklabels(labels_all, rotation=35, ha='right', fontsize=11); ax.axhline(0.05, color=COLORS['warning'], ls='--', lw=1.5, alpha=0.7, label='Random (0.05)'); ax.legend(facecolor=COLORS['bg'], edgecolor='none', labelcolor=COLORS['text'], fontsize=10) - ax = axes[1]; bars = ax.bar(x, tpr1_all, color=colors_all, width=0.65, edgecolor='none', zorder=3) - for b, v in zip(bars, tpr1_all): ax.text(b.get_x()+b.get_width()/2, v+0.003, f'{v:.3f}', ha='center', fontsize=9, fontweight='semibold', color=COLORS['text']) - ax.set_ylabel('TPR @ 1% FPR', fontsize=12, fontweight='medium'); ax.set_title('Attack Power at 1% FPR (Strict)', fontsize=14, fontweight='bold', pad=15); ax.set_xticks(x); ax.set_xticklabels(labels_all, rotation=35, ha='right', fontsize=11); ax.axhline(0.01, color=COLORS['warning'], ls='--', lw=1.5, alpha=0.7, label='Random (0.01)'); ax.legend(facecolor=COLORS['bg'], edgecolor='none', labelcolor=COLORS['text'], fontsize=10); plt.tight_layout() - return fig - -def fig_acc_bar(): - names, vals, clrs = [], [], []; ls_c = [COLORS['baseline']] + COLORS['ls_colors'] - for i, (k, l) in enumerate(zip(LS_KEYS, LS_LABELS_PLOT)): - if k in utility_results: names.append(l); vals.append(utility_results[k]['accuracy']*100); clrs.append(ls_c[i]) - for i, (k, l) in enumerate(zip(OP_KEYS, OP_LABELS_PLOT)): - if k in perturb_results: names.append(l); vals.append(bl_acc); clrs.append(COLORS['op_colors'][i]) - fig, ax = plt.subplots(figsize=(14, 6)); apply_light_style(fig, ax); bars = ax.bar(range(len(names)), vals, color=clrs, width=0.65, edgecolor='none', zorder=3) - for b, v in zip(bars, vals): ax.text(b.get_x()+b.get_width()/2, v+1, f'{v:.1f}%', ha='center', fontsize=10, fontweight='semibold', color=COLORS['text']) - ax.set_ylabel('Test Accuracy (%)', fontsize=12, fontweight='medium'); ax.set_title('Model Utility: Test Accuracy', fontsize=14, fontweight='bold', pad=20); ax.set_ylim(0, 105); ax.set_xticks(range(len(names))); ax.set_xticklabels(names, rotation=30, ha='right', fontsize=11); plt.tight_layout() - return fig - -def fig_tradeoff(): - fig, ax = plt.subplots(figsize=(11, 8)); apply_light_style(fig, ax); markers_ls = ['o', 's', 's', 's', 's']; ls_c = [COLORS['baseline']] + COLORS['ls_colors'] - for i, (k, l) in enumerate(zip(LS_KEYS, LS_LABELS_PLOT)): - if k in mia_results and k in utility_results: ax.scatter(utility_results[k]['accuracy']*100, mia_results[k]['auc'], label=l, marker=markers_ls[i], color=ls_c[i], s=180, edgecolors='white', lw=1.5, zorder=5, alpha=0.9) - op_markers = ['^', 'D', 'v', 'P', 'X', 'h'] - for i, (k, l) in enumerate(zip(OP_KEYS, OP_LABELS_PLOT)): - if k in perturb_results: ax.scatter(bl_acc, perturb_results[k]['auc'], label=l, marker=op_markers[i], color=COLORS['op_colors'][i], s=180, edgecolors='white', lw=1.5, zorder=5, alpha=0.9) - ax.axhline(0.5, color=COLORS['text_dim'], ls='--', alpha=0.6, label='Random (AUC=0.5)'); ax.annotate('IDEAL ZONE\nHigh Utility, Low Risk', xy=(85, 0.51), fontsize=11, fontweight='bold', color=COLORS['success'], alpha=0.7, ha='center', backgroundcolor=COLORS['bg']); ax.annotate('HIGH RISK ZONE\nLow Utility, High Risk', xy=(62, 0.61), fontsize=11, fontweight='bold', color=COLORS['danger'], alpha=0.7, ha='center', backgroundcolor=COLORS['bg']); ax.set_xlabel('Model Utility (Accuracy %)', fontsize=12, fontweight='medium'); ax.set_ylabel('Privacy Risk (MIA AUC)', fontsize=12, fontweight='medium'); ax.set_title('Privacy-Utility Trade-off Analysis', fontsize=14, fontweight='bold', pad=20); ax.legend(fontsize=10, loc='upper left', ncol=2, facecolor=COLORS['bg'], edgecolor='none', labelcolor=COLORS['text']); plt.tight_layout() - return fig - -def fig_auc_trend(): - fig, axes = plt.subplots(1, 2, figsize=(16, 6.5)); apply_light_style(fig, axes); ax = axes[0]; eps_vals = [0.0, 0.02, 0.05, 0.1, 0.2]; auc_vals = [gm(k, 'auc') for k in LS_KEYS]; acc_vals = [gu(k) for k in LS_KEYS] - ax2 = ax.twinx(); line1 = ax.plot(eps_vals, auc_vals, 'o-', color=COLORS['danger'], lw=3, ms=9, label='MIA AUC (left)', zorder=5); line2 = ax2.plot(eps_vals, acc_vals, 's--', color=COLORS['accent'], lw=3, ms=9, label='Utility % (right)', zorder=5); ax.axhline(0.5, color=COLORS['text_dim'], ls=':', alpha=0.5) - ax.set_xlabel('Label Smoothing ε', fontsize=12, fontweight='medium'); ax.set_ylabel('MIA AUC', fontsize=12, fontweight='medium', color=COLORS['danger']); ax2.set_ylabel('Utility (%)', fontsize=12, fontweight='medium', color=COLORS['accent']); ax.set_title('Label Smoothing Trends', fontsize=14, fontweight='bold', pad=15); ax.tick_params(axis='y', labelcolor=COLORS['danger']); ax2.tick_params(axis='y', labelcolor=COLORS['accent']); ax2.spines['right'].set_color(COLORS['accent']); ax2.spines['left'].set_color(COLORS['danger']); lines = line1 + line2; labels = [l.get_label() for l in lines]; ax.legend(lines, labels, fontsize=10, facecolor=COLORS['bg'], edgecolor='none', labelcolor=COLORS['text']) - ax = axes[1]; sig_vals = OP_SIGMAS; auc_op = [gm(k, 'auc') for k in OP_KEYS]; ax.plot(sig_vals, auc_op, 'o-', color=COLORS['success'], lw=3, ms=9, zorder=5, label='MIA AUC'); ax.axhline(bl_auc, color=COLORS['danger'], ls='--', lw=2, alpha=0.6, label=f'Baseline ({bl_auc:.4f})'); ax.axhline(0.5, color=COLORS['text_dim'], ls=':', alpha=0.5, label='Random (0.5)'); ax.fill_between(sig_vals, auc_op, bl_auc, alpha=0.2, color=COLORS['success'], label='AUC Reduction') - ax.set_xlabel('Perturbation σ', fontsize=12, fontweight='medium'); ax.set_ylabel('MIA AUC', fontsize=12, fontweight='medium'); ax.set_title('Output Perturbation Trends', fontsize=14, fontweight='bold', pad=15); ax.legend(fontsize=10, facecolor=COLORS['bg'], edgecolor='none', labelcolor=COLORS['text']); plt.tight_layout() - return fig - -def fig_loss_gap_waterfall(): - fig, ax = plt.subplots(figsize=(14, 6.5)); apply_light_style(fig, ax); names, gaps, clrs = [], [], []; ls_c = [COLORS['baseline']] + COLORS['ls_colors'] - for i, (k, l) in enumerate(zip(LS_KEYS, LS_LABELS_PLOT)): names.append(l); gaps.append(gm(k, 'loss_gap')); clrs.append(ls_c[i]) - for i, (k, l) in enumerate(zip(OP_KEYS, OP_LABELS_PLOT)): names.append(l); gaps.append(gm(k, 'loss_gap')); clrs.append(COLORS['op_colors'][i]) - bars = ax.bar(range(len(names)), gaps, color=clrs, width=0.65, edgecolor='none', zorder=3) - for b, v in zip(bars, gaps): ax.text(b.get_x()+b.get_width()/2, v+0.0005, f'{v:.4f}', ha='center', fontsize=10, fontweight='semibold', color=COLORS['text']) - ax.set_ylabel('Loss Gap', fontsize=12, fontweight='medium'); ax.set_title('Member vs Non-Member Loss Gap', fontsize=14, fontweight='bold', pad=20); ax.set_xticks(range(len(names))); ax.set_xticklabels(names, rotation=30, ha='right', fontsize=11); ax.annotate('Smaller gap = Better Privacy', xy=(8, gaps[0]*0.4), fontsize=11, color=COLORS['success'], fontstyle='italic', ha='center', backgroundcolor=COLORS['bg'], bbox=dict(boxstyle='round,pad=0.4', facecolor=COLORS['panel'], edgecolor=COLORS['success'], alpha=0.8)); plt.tight_layout() - return fig +# ================================================================ +# 效用图 +# ================================================================ +def fig_acc(): + ns,vs,cs=[],[],[];lc=[CL['base']]+CL['ls'] + for i,(k,l) in enumerate(zip(LK,LE)): + if k in UR:ns.append(l);vs.append(UR[k]['accuracy']*100);cs.append(lc[i]) + for i,(k,l) in enumerate(zip(OK,OE)): + if k in PR:ns.append(l);vs.append(BAC);cs.append(CL['op'][i]) + fig,ax=plt.subplots(figsize=(CW,CH));sax(fig,ax) + bars=ax.bar(range(len(ns)),vs,color=cs,width=0.62,edgecolor='white',lw=2,zorder=3) + for b,v in zip(bars,vs):ax.text(b.get_x()+b.get_width()/2,v+0.5,f'{v:.1f}%',ha='center',fontsize=8.5,fontweight='bold',color=CL['tx']) + ax.set_ylabel('Accuracy %',fontsize=11,fontweight='600');ax.set_title('[D5] Model Utility (300 Questions)',fontsize=12,fontweight='700',pad=12);ax.set_ylim(0,100) + ax.set_xticks(range(len(ns)));ax.set_xticklabels(ns,rotation=30,ha='right',fontsize=8.5);plt.tight_layout();return fig # ================================================================ -# 回调函数 +# Gauge + Loss分布(全部LS) # ================================================================ -def cb_sample(src): - pool = member_data if "训练集" in src else non_member_data - s = pool[np.random.randint(len(pool))] - m = s['metadata'] - md = f""" - - - - - - - - - - -
字段
姓名{clean_text(str(m.get('name','')))}
学号{clean_text(str(m.get('student_id','')))}
班级{clean_text(str(m.get('class','')))}
成绩{clean_text(str(m.get('score','')))} 分
类型{TYPE_CN.get(s.get('task_type',''), '')}
- """ - return md, clean_text(s.get('question', '')), clean_text(s.get('answer', '')) - -ATK_CHOICES = ( - ["基线模型 (Baseline)"] + - [f"标签平滑 (ε={e})" for e in [0.02, 0.05, 0.1, 0.2]] + - [f"输出扰动 (σ={s})" for s in OP_SIGMAS] -) -ATK_MAP = {"基线模型 (Baseline)": "baseline"} -for e in [0.02, 0.05, 0.1, 0.2]: ATK_MAP[f"标签平滑 (ε={e})"] = f"smooth_eps_{e}" -for s in OP_SIGMAS: ATK_MAP[f"输出扰动 (σ={s})"] = f"perturbation_{s}" - -def cb_attack(idx, src, target): - is_mem = "训练集" in src - pool = member_data if is_mem else non_member_data - idx = min(int(idx), len(pool)-1) - sample = pool[idx] - key = ATK_MAP.get(target, "baseline") - is_op = key.startswith("perturbation_") +def fig_gauge(lv,mm,nm,thr,ms_v,ns_v): + fig,ax=plt.subplots(figsize=(10,2.6));fig.patch.set_facecolor('white');ax.set_facecolor('white') + xlo=min(mm-3.5*ms_v,lv-0.005);xhi=max(nm+3.5*ns_v,lv+0.005) + ax.axvspan(xlo,thr,alpha=0.06,color=CL['blue']);ax.axvspan(thr,xhi,alpha=0.06,color=CL['red']) + ax.axvline(thr,color=CL['tx'],lw=2,zorder=3);ax.text(thr,1.08,f'Threshold={thr:.4f}',ha='center',va='bottom',fontsize=9,fontweight='bold',color=CL['tx'],transform=ax.get_xaxis_transform()) + mc=CL['blue'] if lv{ic} {nm}{ct(str(vl))}" + return h+"",ct(s.get('question','')),ct(s.get('answer','')) + +ATK_C=["\u57fa\u7ebf\u6a21\u578b (Baseline)"]+[f"\u6807\u7b7e\u5e73\u6ed1 (\u03b5={e})" for e in [0.02,0.05,0.1,0.2]]+[f"\u8f93\u51fa\u6270\u52a8 (\u03c3={s})" for s in OS] +ATK_M={"\u57fa\u7ebf\u6a21\u578b (Baseline)":"baseline"} +for e in [0.02,0.05,0.1,0.2]:ATK_M[f"\u6807\u7b7e\u5e73\u6ed1 (\u03b5={e})"]=f"smooth_eps_{e}" +for s in OS:ATK_M[f"\u8f93\u51fa\u6270\u52a8 (\u03c3={s})"]=f"perturbation_{s}" + +def cb_attack(idx,src,target): + is_mem="\u8bad\u7ec3" in src;pool=mem if is_mem else nmem;idx=min(int(idx),len(pool)-1);sample=pool[idx] + key=ATK_M.get(target,"baseline");is_op=key.startswith("perturbation_") if is_op: - sigma = float(key.split("_")[1]) - fr = full_losses.get('baseline', {}) - lk = 'member_losses' if is_mem else 'non_member_losses' - ll = fr.get(lk, []) - base_loss = ll[idx] if idx < len(ll) else float(np.random.normal(bl_m_mean if is_mem else bl_nm_mean, 0.02)) - np.random.seed(idx*1000 + int(sigma*10000)) - loss = base_loss + np.random.normal(0, sigma) - - mm = gm(key, "member_loss_mean", 0.19) - nm_m = gm(key, "non_member_loss_mean", 0.20) - ms = gm(key, "member_loss_std", np.sqrt(0.03**2 + sigma**2)) - ns = gm(key, "non_member_loss_std", np.sqrt(0.03**2 + sigma**2)) - auc_v = gm(key, "auc") - lbl = f"OP(σ={sigma})" - else: - info = mia_results.get(key, mia_results.get('baseline', {})) - fr = full_losses.get(key, full_losses.get('baseline', {})) - lk = 'member_losses' if is_mem else 'non_member_losses' - ll = fr.get(lk, []) - loss = ll[idx] if idx < len(ll) else float(np.random.normal(info.get('member_loss_mean',0.19), 0.02)) - mm = info.get('member_loss_mean', 0.19); nm_m = info.get('non_member_loss_mean', 0.20) - ms = info.get('member_loss_std', 0.03); ns = info.get('non_member_loss_std', 0.03) - auc_v = info.get('auc', 0) - lbl = "Baseline" if key == "baseline" else f"LS(ε={key.replace('smooth_eps_','')})" - - thr = (mm + nm_m) / 2 - pred = loss < thr - correct = pred == is_mem - gauge = fig_gauge(loss, mm, nm_m, thr, ms, ns) - - pl = "🔴 训练成员" if pred else "🟢 非训练成员" - al = "🔴 训练成员" if is_mem else "🟢 非训练成员" - - if correct and pred and is_mem: - v = f"
⚠️ 攻击成功:隐私泄露
模型对该样本过于熟悉(Loss < 阈值),攻击者成功判定为训练数据。
" - elif correct: - v = f"
判定正确
攻击者判定与真实身份一致。
" + sigma=float(key.split("_")[1]);fr=FL.get('baseline',{});lk='member_losses' if is_mem else 'non_member_losses';ll=fr.get(lk,[]) + bl=ll[idx] if idx🛡️ 防御成功
攻击者判定错误,防御起到了保护作用。" - - table_html = f""" - - - - - - - - - - - - - - - - - - - - -
项目攻击者判定真实身份
身份{pl}{al}
Loss / 阈值Loss: {loss:.4f}阈值: {thr:.4f}
- """ - - res = v + f"
🎯 攻击目标: {lbl} 📊 AUC: {auc_v:.4f}
" + table_html - qtxt = f"**样本 #{idx}**\n\n" + clean_text(sample.get('question',''))[:500] - return qtxt, gauge, res - -EVAL_CHOICES = ( - ["基线模型"] + - [f"标签平滑 (ε={e})" for e in [0.02, 0.05, 0.1, 0.2]] + - [f"输出扰动 (σ={s})" for s in OP_SIGMAS] -) -EVAL_KEY_MAP = {"基线模型": "baseline"} -for e in [0.02, 0.05, 0.1, 0.2]: EVAL_KEY_MAP[f"标签平滑 (ε={e})"] = f"smooth_eps_{e}" -for s in OP_SIGMAS: EVAL_KEY_MAP[f"输出扰动 (σ={s})"] = "baseline" - -def cb_eval(model_choice): - k = EVAL_KEY_MAP.get(model_choice, "baseline") - acc = gu(k) if "输出扰动" not in model_choice else bl_acc - q = EVAL_POOL[np.random.randint(len(EVAL_POOL))] - ok = q.get(k, q.get('baseline', False)) - ic = "✅ 正确" if ok else "❌ 错误" - note = "\n\n> 输出扰动不改变模型参数,准确率与基线一致。" if "输出扰动" in model_choice else "" - table_html = f""" - - - - - - - -
类型{q['type_cn']}
题目{q['question']}
正确答案{q['answer']}
判定{ic}
- """ - return (f"
🤖 模型: {model_choice} 🎯 准确率: {acc:.1f}%
" + table_html + note) - -def build_full_table(): - rows = [] - for k, l in zip(LS_KEYS, LS_LABELS_MD): - if k in mia_results: - m = mia_results[k]; u = gu(k) - t = "—" if k == "baseline" else "训练期"; d = "" if k == "baseline" else f"{m['auc']-bl_auc:+.4f}" - rows.append(f"| {l} | {t} | {m['auc']:.4f} | {m['attack_accuracy']:.4f} | {m['precision']:.4f} | {m['recall']:.4f} | {m['f1']:.4f} | {m['tpr_at_5fpr']:.4f} | {m['tpr_at_1fpr']:.4f} | {m['loss_gap']:.4f} | {u:.1f}% | {d} |") - for k, l in zip(OP_KEYS, OP_LABELS_MD): - if k in perturb_results: - m = perturb_results[k]; d = f"{m['auc']-bl_auc:+.4f}" - rows.append(f"| {l} | 推理期 | {m['auc']:.4f} | {m['attack_accuracy']:.4f} | {m['precision']:.4f} | {m['recall']:.4f} | {m['f1']:.4f} | {m['tpr_at_5fpr']:.4f} | {m['tpr_at_1fpr']:.4f} | {m['loss_gap']:.4f} | {bl_acc:.1f}% | {d} |") - header = ("| 策略 | 类型 | AUC | Acc | Prec | Rec | F1 | TPR@5% | TPR@1% | LossGap | 效用 | ΔAUC |\n" - "|---|---|---|---|---|---|---|---|---|---|---|---|") - return header + "\n" + "\n".join(rows) + info=MR.get(key,MR.get('baseline',{}));fr=FL.get(key,FL.get('baseline',{}));lk='member_losses' if is_mem else 'non_member_losses';ll=fr.get(lk,[]) + loss=ll[idx] if idx\U0001f3af \u653b\u51fb\u76ee\u6807: {lbl} | AUC: {av:.4f}
\u653b\u51fb\u8005\u5224\u5b9a\u771f\u5b9e\u8eab\u4efd
\u8eab\u4efd{pl}{al}
Loss{loss:.4f}\u9608\u503c: {thr:.4f}
""" + return f"\U0001f4cb \u6837\u672c **#{idx}**\n\n{ct(sample.get('question',''))[:500]}",gauge,r + +EV_C=["\u57fa\u7ebf\u6a21\u578b"]+[f"\u6807\u7b7e\u5e73\u6ed1 (\u03b5={e})" for e in [0.02,0.05,0.1,0.2]]+[f"\u8f93\u51fa\u6270\u52a8 (\u03c3={s})" for s in OS] +EV_M={"\u57fa\u7ebf\u6a21\u578b":"baseline"}; +for e in [0.02,0.05,0.1,0.2]:EV_M[f"\u6807\u7b7e\u5e73\u6ed1 (\u03b5={e})"]=f"smooth_eps_{e}" +for s in OS:EV_M[f"\u8f93\u51fa\u6270\u52a8 (\u03c3={s})"]="baseline" +def cb_eval(mc): + k=EV_M.get(mc,"baseline");acc=gu(k) if "\u8f93\u51fa\u6270\u52a8" not in mc else BAC;q=EP[np.random.randint(len(EP))];ok=q.get(k,q.get('baseline',False));ic="\u2705 \u6b63\u786e" if ok else "\u274c \u9519\u8bef" + return f"""
\u6a21\u578b: {mc} | \u51c6\u786e\u7387: {acc:.1f}%
\u7c7b\u578b{q['tc']}
\u9898\u76ee{q['question']}
\u7b54\u6848{q['answer']}
\u5224\u5b9a{ic}
""" + +def build_table(): + rows=[] + for k,l in zip(LK,["\u57fa\u7ebf","LS(\u03b5=0.02)","LS(\u03b5=0.05)","LS(\u03b5=0.1)","LS(\u03b5=0.2)"]): + if k in MR:m=MR[k];u=gu(k);t="\u2014" if k=="baseline" else "\u8bad\u7ec3\u671f";d="" if k=="baseline" else f"{m['auc']-BA:+.4f}";rows.append(f"| {l} | {t} | {m['auc']:.4f} | {m['attack_accuracy']:.4f} | {m['precision']:.4f} | {m['recall']:.4f} | {m['f1']:.4f} | {m['tpr_at_5fpr']:.4f} | {m['tpr_at_1fpr']:.4f} | {m['loss_gap']:.4f} | {u:.1f}% | {d} |") + for k,l in zip(OK,[f"OP(\u03c3={s})" for s in OS]): + if k in PR:m=PR[k];d=f"{m['auc']-BA:+.4f}";rows.append(f"| {l} | \u63a8\u7406\u671f | {m['auc']:.4f} | {m['attack_accuracy']:.4f} | {m['precision']:.4f} | {m['recall']:.4f} | {m['f1']:.4f} | {m['tpr_at_5fpr']:.4f} | {m['tpr_at_1fpr']:.4f} | {m['loss_gap']:.4f} | {BAC:.1f}% | {d} |") + return "| \u7b56\u7565 | \u7c7b\u578b | AUC | Acc | Prec | Rec | F1 | TPR@5% | TPR@1% | Gap | \u6548\u7528 | \u0394AUC |\n|---|---|---|---|---|---|---|---|---|---|---|---|\n"+"\n".join(rows) # ================================================================ -# CSS - 简约苹果风 +# CSS # ================================================================ -CSS = """ -:root { - --primary-blue: #007AFF; - --bg-light: #F5F5F7; - --card-bg: #FFFFFF; - --text-dark: #1D1D1F; - --text-gray: #86868B; - --border-color: #D2D2D7; -} - -body { background-color: var(--bg-light) !important; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif !important; color: var(--text-dark) !important; } -.gradio-container { max-width: 1350px !important; margin: 40px auto !important; } -.title-area { background-color: var(--card-bg); padding: 32px 40px; border-radius: 18px; box-shadow: 0 4px 12px rgba(0, 0, 0, 0.05); margin-bottom: 30px; text-align: center; } -.title-area h1 { color: var(--text-dark) !important; font-size: 2.2rem !important; font-weight: 700 !important; margin-bottom: 10px !important; letter-spacing: -0.5px; } -.title-area p { color: var(--text-gray) !important; font-size: 1.1rem !important; margin-bottom: 15px !important; } -.title-area .badge { display: inline-block; background-color: #E5F1FF; color: var(--primary-blue); padding: 6px 16px; border-radius: 20px; font-size: 0.9rem; font-weight: 600; } -.tabitem { background-color: var(--card-bg) !important; border-radius: 18px !important; border: none !important; box-shadow: 0 8px 24px rgba(0, 0, 0, 0.08) !important; padding: 40px !important; margin-top: 20px !important; } -.tab-nav { border-bottom: none !important; gap: 10px !important; background: transparent !important; padding-bottom: 5px !important; } -.tab-nav button { font-size: 15px !important; padding: 10px 20px !important; font-weight: 500 !important; color: var(--text-gray) !important; background: rgba(0,0,0,0.03) !important; border: none !important; border-radius: 12px !important; transition: all 0.2s ease !important; } -.tab-nav button:hover { background: rgba(0,0,0,0.06) !important; color: var(--text-dark) !important; } -.tab-nav button.selected { color: var(--primary-blue) !important; background: #E5F1FF !important; font-weight: 600 !important; } -.prose { color: var(--text-dark) !important; } -.prose h2 { color: var(--text-dark) !important; font-weight: 700 !important; border-bottom: 1px solid var(--border-color) !important; padding-bottom: 12px !important; margin-top: 30px !important; } -.prose h3 { color: var(--text-dark) !important; font-weight: 600 !important; margin-top: 24px !important; } -.prose h4 { color: var(--text-gray) !important; font-weight: 600 !important; margin-bottom: 12px !important; } -.prose table { border-collapse: separate !important; border-spacing: 0 !important; width: 100% !important; border: 1px solid var(--border-color) !important; border-radius: 12px !important; overflow: hidden !important; box-shadow: 0 2px 8px rgba(0,0,0,0.04) !important; } -.prose th { background: #F9F9F9 !important; color: var(--text-gray) !important; font-weight: 600 !important; padding: 14px 18px !important; text-align: left !important; border-bottom: 1px solid var(--border-color) !important; white-space: nowrap !important; } -.prose td { padding: 14px 18px !important; color: var(--text-dark) !important; border-bottom: 1px solid var(--border-color) !important; background: var(--card-bg) !important; white-space: nowrap !important; } -.prose tr:last-child td { border-bottom: none !important; } -.prose tr:hover td { background: #F5F7FA !important; } -button.primary { background-color: var(--primary-blue) !important; color: white !important; border: none !important; border-radius: 10px !important; font-weight: 600 !important; padding: 12px 24px !important; box-shadow: 0 2px 6px rgba(0, 122, 255, 0.25) !important; transition: all 0.2s !important; } -button.primary:hover { background-color: #0062CC !important; box-shadow: 0 4px 10px rgba(0, 122, 255, 0.35) !important; transform: translateY(-1px) !important; } -.card-wrap { background: var(--card-bg) !important; border: 1px solid var(--border-color) !important; border-radius: 14px !important; padding: 24px !important; box-shadow: 0 2px 8px rgba(0,0,0,0.04) !important; } -.block.svelte-12cmxck { border-radius: 12px !important; border-color: var(--border-color) !important; } -.input-label { color: var(--text-gray) !important; font-weight: 500 !important; } -footer { display: none !important; } +CSS=""" +@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap'); +body{background:#F7F8FA !important;font-family:'Inter',-apple-system,sans-serif !important} +.gradio-container{max-width:1260px !important;margin:20px auto !important} +.hero-box{background:white;border-radius:16px;padding:32px 40px;margin-bottom:20px;border:1px solid #E8ECF1;box-shadow:0 1px 3px rgba(0,0,0,0.04)} +.hero-box h1{color:#1D2939 !important;font-size:1.65rem !important;font-weight:800 !important;margin:0 0 4px !important} +.hero-box .sub{color:#667085;font-size:0.92rem;margin:0 0 14px} +.hero-box .badges{display:flex;gap:8px;flex-wrap:wrap} +.hero-box .badge{padding:4px 14px;border-radius:20px;font-size:0.75rem;font-weight:600} +.b1{background:#EFF8FF;color:#175CD3;border:1px solid #B2DDFF}.b2{background:#F4F3FF;color:#5925DC;border:1px solid #D9D6FE}.b3{background:#F0FDF9;color:#107569;border:1px solid #99F6E4} +.dim-label{display:inline-block;padding:3px 10px;border-radius:6px;font-size:11px;font-weight:700;letter-spacing:0.05em;margin-right:8px} +.dim1{background:#FEF3F2;color:#B42318}.dim2{background:#FFFAEB;color:#B54708}.dim3{background:#F4F3FF;color:#5925DC}.dim4{background:#EFF8FF;color:#175CD3}.dim5{background:#F0FDF9;color:#107569} +.tabitem{background:white !important;border-radius:0 0 14px 14px !important;border:1px solid #E8ECF1 !important;border-top:none !important;box-shadow:0 1px 3px rgba(0,0,0,0.04) !important;padding:32px 36px !important;min-height:700px !important} +.tab-nav{border-bottom:none !important;gap:2px !important} +.tab-nav button{font-size:14px !important;padding:11px 20px !important;font-weight:600 !important;color:#667085 !important;background:#F2F4F7 !important;border:none !important;border-radius:10px 10px 0 0 !important} +.tab-nav button:hover{color:#2E90FA !important;background:#EFF8FF !important} +.tab-nav button.selected{color:#175CD3 !important;background:white !important;box-shadow:0 -2px 0 0 #2E90FA inset !important} +.prose{color:#344054 !important}.prose h2{font-size:1.3rem !important;color:#1D2939 !important;font-weight:700 !important;margin-top:0 !important;padding-bottom:10px !important;border-bottom:2px solid #F2F4F7 !important} +.prose h3{font-size:1.05rem !important;color:#344054 !important;font-weight:600 !important} +.prose table{width:100% !important;border-collapse:separate !important;border-spacing:0 !important;border-radius:10px !important;overflow:hidden !important;border:1px solid #E8ECF1 !important;font-size:0.82rem !important} +.prose th{background:#F9FAFB !important;color:#475467 !important;font-weight:600 !important;padding:9px 12px !important;font-size:0.72rem !important;text-transform:uppercase;letter-spacing:0.04em;border-bottom:2px solid #E8ECF1 !important} +.prose td{padding:9px 12px !important;color:#344054 !important;border-bottom:1px solid #F2F4F7 !important}.prose tr:hover td{background:#F9FAFB !important} +button.primary{background:linear-gradient(135deg,#2E90FA,#1570EF) !important;color:white !important;border:none !important;border-radius:10px !important;font-weight:700 !important;box-shadow:0 1px 3px rgba(46,144,250,0.3) !important} +button.primary:hover{box-shadow:0 4px 12px rgba(46,144,250,0.35) !important;transform:translateY(-1px) !important} +.prose blockquote{border-left:3px solid #2E90FA !important;background:#EFF8FF !important;padding:12px 16px !important;border-radius:0 8px 8px 0 !important;color:#1849A9 !important;font-size:0.88rem !important} +footer{display:none !important} """ # ================================================================ -# UI 布局 +# UI - 按5维度组织 # ================================================================ -with gr.Blocks(title="MIA攻防研究") as demo: - - gr.HTML("""
-

🎓 教育大模型中的成员推理攻击及其防御研究

-

Membership Inference Attack & Defense on Educational LLM

-
✨ 11组实验 × 8维指标 × 2种策略
-
""") - - with gr.Tab("📊 实验总览"): - gr.Markdown(f""" -## 📌 研究背景:为什么教育大模型需要防范 MIA? - -在教育领域,大模型(如虚拟辅导老师)的训练往往离不开学生真实的互动数据,而这些数据中包含了大量**极度敏感的个人隐私**。本研究基于 **{model_name}** 微调的数学辅导模型,系统揭示并解决这一安全隐患。 - -### 1️⃣ 什么是成员推理攻击 (MIA)? -**成员推理攻击 (Membership Inference Attack)** 的核心目的,是判断“某一条特定的数据,到底有没有被用来训练过这个AI?” -* **测谎仪原理**:大模型有一种“偷懒”的天性,对于它在训练时见过的“旧题”(成员数据),它回答得会极其顺畅,**损失值(Loss)非常低**;而面对没见过的“新题”(非成员数据),Loss 会偏高。攻击者正是利用这个 Loss 差距来做判定。 - -### 2️⃣ 教育大模型中的 MIA 危害有多大?(结合实验数据) -想象一下,我们系统后台有这样一条真实的训练数据: -> *“老师您好,我是**李明(学号20231001)**。我上次数学只考了**55分**,计算题老是错,请问 25+37 等于多少?”* - -如果学校直接用这些记录训练了AI,恶意攻击者就可以拿着这句话去“套话”。如果 AI 表现出“极度熟悉”(Loss极低),攻击者就能推断出:**“李明确实在这个学校,且上次数学不及格。”** 学生的姓名、学号、成绩短板等核心隐私将彻底暴露! - -### 3️⃣ 我们如何进行防御? -为了打破攻击者的“测谎仪”,本研究引入了两大防御流派,并探讨了它们在保护隐私与维持 AI 教学智商(效用)之间的平衡: -* 🛡️ **标签平滑 (Label Smoothing, 训练期)**:从小教育 AI“不要死记硬背”。在训练时强行引入不确定性,逼迫 AI 去学习加减乘除的通用规律,而不是死记李明的名字和分数。 -* 🛡️ **输出扰动 (Output Perturbation, 推理期)**:给 AI 的输出加上“变声器”。在攻击者探查 Loss 值时,强行混入高斯噪声(加沙子),让攻击者看到的 Loss 忽高忽低,彻底瞎掉,但普通用户看到的文字回答依然绝对正确。 -""") - - if os.path.exists(os.path.join(BASE_DIR, "figures", "algo4_overview_cn_final.png")): - gr.Image(os.path.join(BASE_DIR, "figures", "algo4_overview_cn_final.png"), label="实验体系总览", show_label=True) - - gr.HTML(f"""
-
-
5
-
训练模型
-
🤖
-
-
-
6
-
扰动配置
-
🎛️
-
-
-
8
-
评估指标
-
📈
-
-
-
2000
-
测试样本
-
📄
-
-
""") - - with gr.Accordion("📋 完整实验结果表(11组 × 8维度)", open=True): - gr.Markdown(build_full_table()) - - with gr.Tab("📁 数据与模型"): - gr.HTML("""
-
-

📦 数据组成

- - - - -
数据组数量用途说明
🔴 成员数据1000条模型训练Loss偏低
🟢 非成员数据1000条攻击对照Loss偏高
-
-
-

📚 任务分布

- - - - - - -
类别数量占比
🔢 基础计算80040%
📝 应用题60030%
💬 概念问答40020%
✏️ 错题订正20010%
-
""") - gr.HTML(f'
⚠️ 注意:两组数据格式完全相同(均含隐私字段),这是MIA实验的标准设置——攻击者无法从格式区分。
') - gr.Markdown("### 🔍 数据样例提取") - with gr.Row(): - with gr.Column(scale=1): - gr.Markdown("#### ⚙️ 提取控制台") - d_src = gr.Radio(["成员数据(训练集)", "非成员数据(测试集)"], value="成员数据(训练集)", label="目标数据源") - d_btn = gr.Button("🎲 随机提取样本", variant="primary") - d_meta = gr.HTML() +with gr.Blocks(title="MIA\u653b\u9632\u7814\u7a76",theme=gr.themes.Soft(),css=CSS) as demo: + gr.HTML("""

\U0001f393 \u6559\u80b2\u5927\u6a21\u578b\u4e2d\u7684\u6210\u5458\u63a8\u7406\u653b\u51fb\u53ca\u5176\u9632\u5fa1\u7814\u7a76

Membership Inference Attack & Defense on Educational LLM

\U0001f4ca 11 Experiments\U0001f9ea 8 Metrics\U0001f6e1\ufe0f 2 Defenses
""") + + with gr.Tab("\U0001f4ca \u5b9e\u9a8c\u603b\u89c8"): + gr.Markdown(f"## \u7814\u7a76\u80cc\u666f\u4e0e\u76ee\u6807\n\n\u672c\u7814\u7a76\u57fa\u4e8e **{mn}** \u5fae\u8c03\u7684\u6570\u5b66\u8f85\u5bfc\u6a21\u578b\uff0c\u7cfb\u7edf\u9a8c\u8bc1MIA\u98ce\u9669\u5e76\u8bc4\u4f30\u4e24\u7c7b\u9632\u5fa1\u7b56\u7565\u3002") + gr.HTML(f'
5
\u8bad\u7ec3\u6a21\u578b
6
\u6270\u52a8\u914d\u7f6e
8
\u8bc4\u4f30\u6307\u6807
2000
\u6d4b\u8bd5\u6837\u672c
') + if os.path.exists(os.path.join(BASE,"figures/algo4_overview.png")): + gr.Image(os.path.join(BASE,"figures/algo4_overview.png"),label="\u5b9e\u9a8c\u4f53\u7cfb\u603b\u89c8") + with gr.Accordion("\u5b8c\u6574\u5b9e\u9a8c\u7ed3\u679c\u8868\uff0811\u7ec4\u00d78\u6307\u6807\uff09",open=True): + gr.Markdown(build_table()) + + with gr.Tab("\U0001f4c1 \u6570\u636e\u4e0e\u6a21\u578b"): + gr.HTML('

\U0001f4e6 \u6570\u636e\u7ec4\u6210

\u6570\u636e\u7ec4\u6570\u91cf\u7528\u9014
\U0001f534 \u6210\u54581000\u8bad\u7ec3
\U0001f7e2 \u975e\u6210\u54581000\u5bf9\u7167

\U0001f4da \u4efb\u52a1\u5206\u5e03

\u7c7b\u522b\u6570\u91cf\u5360\u6bd4
\u8ba1\u7b9780040%
\u5e94\u7528\u989860030%
\u6982\u5ff540020%
\u9519\u989820010%
') + gr.Markdown("### \u6570\u636e\u6837\u4f8b\u6d4f\u89c8") + with gr.Row(equal_height=True): with gr.Column(scale=2): - gr.Markdown("#### 📄 样本详情") - d_q = gr.Textbox(label="🧑‍🎓 学生提问 (Prompt)", lines=6, interactive=False) - d_a = gr.Textbox(label="💡 标准回答 (Ground Truth)", lines=6, interactive=False) - d_btn.click(cb_sample, [d_src], [d_meta, d_q, d_a]) - - with gr.Tab("🧠 算法原理"): - gr.Markdown("## 算法流程图与伪代码") - - gr.Markdown("### Algorithm 1: 基于Loss的成员推理攻击 (MIA)") - if os.path.exists(os.path.join(BASE_DIR, "figures", "algo1_mia_attack.png")): - gr.Image(os.path.join(BASE_DIR, "figures", "algo1_mia_attack.png"), show_label=False) + d_src=gr.Radio(["\u6210\u5458\u6570\u636e\uff08\u8bad\u7ec3\u96c6\uff09","\u975e\u6210\u5458\u6570\u636e\uff08\u6d4b\u8bd5\u96c6\uff09"],value="\u6210\u5458\u6570\u636e\uff08\u8bad\u7ec3\u96c6\uff09",label="\u6570\u636e\u6765\u6e90");d_btn=gr.Button("\U0001f3b2 \u968f\u673a\u63d0\u53d6\u6837\u672c",variant="primary");d_meta=gr.HTML() + with gr.Column(scale=3): + d_q=gr.Textbox(label="\u5b66\u751f\u63d0\u95ee",lines=5,interactive=False);d_a=gr.Textbox(label="\u6807\u51c6\u56de\u7b54",lines=5,interactive=False) + d_btn.click(cb_sample,[d_src],[d_meta,d_q,d_a]) + + with gr.Tab("\U0001f9e0 \u7b97\u6cd5\u539f\u7406"): + gr.Markdown("## \u7b97\u6cd5\u6d41\u7a0b\u56fe\u4e0e\u4f2a\u4ee3\u7801") + for name,file,desc in [("Algorithm 1: \u57fa\u4e8eLoss\u7684\u6210\u5458\u63a8\u7406\u653b\u51fb","algo1_mia_attack.png","\u5229\u7528\u6a21\u578b\u5bf9\u8bad\u7ec3\u6570\u636eLoss\u66f4\u4f4e\u7684\u7279\u6027\uff0c\u901a\u8fc7\u9608\u503c\u5224\u5b9a\u6570\u636e\u5f52\u5c5e"),("Algorithm 2: \u6807\u7b7e\u5e73\u6ed1\u9632\u5fa1","algo2_label_smoothing.png","\u8f6f\u5316one-hot\u6807\u7b7e\u964d\u4f4e\u6a21\u578b\u8bb0\u5fc6\uff0c\u517c\u5177\u6b63\u5219\u5316\u6548\u5e94"),("Algorithm 3: \u8f93\u51fa\u6270\u52a8\u9632\u5fa1","algo3_output_perturbation.png","\u5728Loss\u8f93\u51fa\u4e0a\u52a0\u566a\u58f0\u906e\u853d\u653b\u51fb\u4fe1\u53f7\uff0c\u96f6\u6548\u7528\u635f\u5931")]: + gr.Markdown(f"### {name}\n\n> {desc}") + fp=os.path.join(BASE,f"figures/{file}") + if os.path.exists(fp):gr.Image(fp,show_label=False) + + with gr.Tab("\U0001f3af \u653b\u51fb\u9a8c\u8bc1"): + gr.Markdown("## \u6210\u5458\u63a8\u7406\u653b\u51fb\u4ea4\u4e92\u6f14\u793a") + with gr.Row(equal_height=True): + with gr.Column(scale=2): + a_t=gr.Radio(ATK_C,value=ATK_C[0],label="\u653b\u51fb\u76ee\u6807");a_s=gr.Radio(["\u6210\u5458\u6570\u636e\uff08\u8bad\u7ec3\u96c6\uff09","\u975e\u6210\u5458\u6570\u636e\uff08\u6d4b\u8bd5\u96c6\uff09"],value="\u6210\u5458\u6570\u636e\uff08\u8bad\u7ec3\u96c6\uff09",label="\u6570\u636e\u6765\u6e90") + a_i=gr.Slider(0,999,step=1,value=12,label="\u6837\u672cID");a_b=gr.Button("\u26a1 \u6267\u884c\u653b\u51fb",variant="primary",size="lg");a_qt=gr.Markdown() + with gr.Column(scale=3): + a_g=gr.Plot(label="Loss Decision Boundary");a_r=gr.HTML() + a_b.click(cb_attack,[a_i,a_s,a_t],[a_qt,a_g,a_r]) + + # ===== 核心Tab: 5维度攻防分析 ===== + with gr.Tab("\U0001f6e1\ufe0f \u4e94\u7ef4\u5ea6\u653b\u9632\u5206\u6790"): + gr.Markdown("## \u4e94\u7ef4\u5ea6\u653b\u9632\u6548\u679c\u5b8c\u6574\u8bba\u8bc1") + + # D1 + gr.HTML('
D1\u5b8f\u89c2\u8bc4\u4ef7\u7ef4\u5ea6 \u2014 \u8bc1\u660e\u201c\u603b\u4f53\u653b\u9632\u80fd\u529b\u201d
') gr.Markdown(f"""\ -> **原理讲解:** MIA利用了“模型对训练数据记忆更深”这一现象。当模型“见过”某条数据时,它的预测不确定性更低,表现为**Loss偏低**。攻击者正是利用这个差异来判断数据是否属于训练集。 +> **\u653b\u51fb\u6709\u6548\u7684\u8bc1\u636e\uff1a** \u57fa\u7ebfROC\u66f2\u7ebf\u660e\u663e\u5411\u5de6\u4e0a\u51f8\u8d77\uff0cAUC={BA:.4f}\uff0c\u663e\u8457\u9ad8\u4e8e\u968f\u673a\u731c\u6d4b\u76840.5\u3002\u8fd9\u76f4\u63a5\u8bc1\u660e\uff1a\u5982\u679c\u4e0d\u52a0\u9632\u5fa1\uff0c\u5927\u6a21\u578b\u786e\u5b9e\u8bb0\u4f4f\u4e86\u5b66\u751f\u7684\u9690\u79c1\u3002 > -> 本实验中,基线模型的成员平均Loss={bl_m_mean:.4f},非成员平均Loss={bl_nm_mean:.4f},差距{bl_nm_mean-bl_m_mean:.4f},足以被攻击者利用。 +> **\u9632\u5fa1\u6709\u6548\u7684\u8bc1\u636e\uff1a** \u65bd\u52a0\u9632\u5fa1\u540e\uff0cAUC\u67f1\u5b50\u663e\u8457\u53d8\u77ed\uff08\u6700\u4f4e\u964d\u81f3{gm('perturbation_0.03','auc'):.4f}\uff09\uff0cROC\u66f2\u7ebf\u51e0\u4e4e\u88ab\u538b\u5e73\u8d34\u8fd1\u5bf9\u89d2\u7ebf\u3002\u9632\u5fa1\u4ece\u6839\u672c\u4e0a\u74e6\u89e3\u4e86\u653b\u51fb\u6709\u6548\u6027\u3002 """) - - gr.Markdown("---\n### Algorithm 2: 标签平滑防御(训练期)") - if os.path.exists(os.path.join(BASE_DIR, "figures", "algo2_label_smoothing.png")): - gr.Image(os.path.join(BASE_DIR, "figures", "algo2_label_smoothing.png"), show_label=False) - gr.Markdown("""\ -> **原理讲解:** 标签平滑将one-hot硬标签软化为概率分布。例如,原始标签[0,0,1,0]变为[0.033,0.033,0.9,0.033]。这迫使模型不再“100%确定”某个答案,从而降低对训练数据的过度记忆。 + gr.Plot(value=fig_d1_auc()) + gr.Plot(value=fig_d1_roc()) + + # D2 + gr.HTML('
D2\u6781\u9650\u5b9e\u6218\u7ef4\u5ea6 \u2014 \u8bc1\u660e\u201c\u6781\u4f4e\u8bef\u62a5\u4e0b\u7684\u5b89\u5168\u5e95\u7ebf\u201d
') + gr.Markdown(f"""\ +> **\u4e3a\u4ec0\u4e48\u8fd9\u4e2a\u6307\u6807\u91cd\u8981\uff1a** \u73b0\u5b9e\u4e2d\u9ed1\u5ba2\u4e3a\u4e86\u4e0d\u6253\u8349\u60ca\u86c7\uff0c\u53ea\u5141\u8bb8\u6781\u4f4e\u7684\u8bef\u62a5\u3002\u5728Baseline\u4e2d\uff0c\u5373\u4f7f\u8bef\u62a5\u7387\u5361\u57285%\uff0c\u653b\u51fb\u8005\u4ecd\u80fd\u7cbe\u51c6\u7a83\u53d6**{gm('baseline','tpr_at_5fpr')*100:.1f}%**\u7684\u771f\u5b9e\u9690\u79c1\u3002 > -> 副作用:正则化效应还能防止过拟合,提升泛化能力。这就是为什么效用会反升的原因。 +> \u5f00\u542f\u9632\u5fa1\u540e\uff0c\u8fd9\u4e2a\u6210\u529f\u7387\u88ab\u6b7b\u6b7b\u538b\u5236\uff1aLS(\u03b5=0.2)\u964d\u81f3{gm('smooth_eps_0.2','tpr_at_5fpr')*100:.1f}%\uff0cOP(\u03c3=0.03)\u964d\u81f3{gm('perturbation_0.03','tpr_at_5fpr')*100:.1f}%\u3002\u8bc1\u660e\u9632\u5fa1\u5728\u6700\u6781\u7aef\u7684\u5b9e\u6218\u6761\u4ef6\u4e0b\u4f9d\u7136\u575a\u5982\u78d0\u77f3\u3002 """) - - gr.Markdown("---\n### Algorithm 3: 输出扰动防御(推理期)") - if os.path.exists(os.path.join(BASE_DIR, "figures", "algo3_output_perturbation.png")): - gr.Image(os.path.join(BASE_DIR, "figures", "algo3_output_perturbation.png"), show_label=False) + gr.Plot(value=fig_d2_tpr()) + + # D3 + gr.HTML('
D3\u673a\u5236\u6eaf\u6e90\u7ef4\u5ea6 \u2014 \u8bc1\u660e\u201c\u5e95\u5c42\u7269\u7406\u903b\u8f91\u201d
') + gr.Markdown(f"""\ +> **\u653b\u51fb\u7684\u6839\u6e90\uff1a** MIA\u653b\u51fb\u80fd\u6210\u529f\uff0c\u662f\u56e0\u4e3a\u6a21\u578b\u5bf9\u201c\u80cc\u8fc7\u201d\u7684\u6570\u636e\u7ed9\u7684Loss\u66f4\u4f4e\u3002\u4e0b\u9762\u7684\u4e09\u8054\u56fe\u76f4\u63a5\u5c55\u793a\u4e86\u8fd9\u4e2a\u73b0\u8c61\uff1a +> +> - **\u5de6\u56fe(Baseline)\uff1a** \u84dd\u7ea2\u4e24\u5ea7\u5c71\u5cf0\u660e\u663e\u9519\u4f4d\uff0cGap={gm('baseline','loss_gap'):.4f}\uff0c\u653b\u51fb\u8005\u53ef\u5229\u7528\u8fd9\u4e2a\u5dee\u5f02 +> - **\u4e2d\u56fe(LS)\uff1a** \u6807\u7b7e\u5e73\u6ed1\u540e\u4e24\u5ea7\u5c71\u5cf0\u8d8b\u4e8e\u91cd\u5408\uff0cGap\u7f29\u5c0f\u2014\u2014\u201c\u7269\u7406\u62b9\u9664\u201d\u4e86\u8bb0\u5fc6 +> - **\u53f3\u56fe(OP)\uff1a** \u566a\u58f0\u8ba9\u5206\u5e03\u53d8\u5f97\u6241\u5e73\u5bbd\u9614\uff0c\u7ea2\u84dd\u88ab\u5b8c\u5168\u6405\u6df7\u2014\u2014\u201c\u8499\u853d\u201d\u4e86\u653b\u51fb\u8005\u7684\u53cc\u773c +""") + gr.Plot(value=fig_d3_dist_compare()) + gr.Plot(value=fig_d3_gap()) + with gr.Accordion("\u6240\u6709\u6807\u7b7e\u5e73\u6ed1\u6a21\u578b\u7684Loss\u5206\u5e03",open=False): + gr.Plot(value=fig_d3_dist_all()) + + # D4 + gr.HTML('
D4\u65e0\u6b7b\u89d2\u538b\u5236\u7ef4\u5ea6 \u2014 \u8bc1\u660e\u201c\u9632\u5fa1\u6ca1\u6709\u504f\u79d1\u201d
') gr.Markdown("""\ -> **原理讲解:** 输出扰动不修改模型本身,而是在返回给攻击者的Loss值上加入随机噪声。攻击者看到的是被噪声污染的Loss,无法精确判断是否低于阈值。 +> \u7ea2\u8272\u57fa\u7ebf\u5708\u9762\u79ef\u6700\u5927 = \u653b\u51fb\u5728\u6240\u6709\u7ef4\u5ea6\u90fd\u5f88\u5f3a\u3002\u9632\u5fa1\u540e\u6574\u4e2a\u591a\u8fb9\u5f62\u5728\u6781\u5176\u5747\u5300\u5730\u5411\u5185\u6536\u7f29\u2014\u2014\u8fd9\u8bc1\u660e\u9632\u7ebf\u662f360\u00b0\u65e0\u6b7b\u89d2\u7684\uff0c\u4e0d\u662f\u64cd\u7eb5\u67d0\u4e00\u4e2a\u5355\u4e00\u6307\u6807\u5f97\u51fa\u7684\u7ed3\u8bba\u3002 > -> 优势:①不需重新训练 ②即插即用 ③不影响模型回答质量(因为只扰动Loss,不扰动生成结果) +> **\u5de6\u56fe\uff1a** 5\u4e2aLS\u6a21\u578b\u3002\u53f3\u56fe\uff1a7\u4e2aOP\u914d\u7f6e\u3002\u5168\u90e811\u7ec4\u5b9e\u9a8c\u5747\u5c55\u793a\u3002 """) + gr.Plot(value=fig_d4_radar()) - with gr.Tab("🎯 攻击验证"): - gr.Markdown("## 🕵️ 成员推理攻击交互演示\n\n配置攻击目标与数据源,系统将执行 Loss 计算并映射判定边界。") - with gr.Row(): - with gr.Column(scale=1): - gr.Markdown("#### ⚙️ 攻击配置台") - a_t = gr.Dropdown(choices=ATK_CHOICES, value=ATK_CHOICES[0], label="🎯 选择被攻击模型", interactive=True) - a_s = gr.Radio(["成员数据(训练集)", "非成员数据(测试集)"], value="成员数据(训练集)", label="📂 输入数据源") - a_i = gr.Slider(0, 999, step=1, value=12, label="📌 定位样本 ID") - a_b = gr.Button("⚡ 执行成员推理攻击", variant="primary") - a_qt = gr.HTML() - with gr.Column(scale=2): - gr.Markdown("#### 📉 攻击结果与 Loss 边界") - a_g = gr.Plot(label="Loss位置判定 (Decision Boundary)") - a_r = gr.HTML() - a_b.click(cb_attack, [a_i, a_s, a_t], [a_qt, a_g, a_r]) - - with gr.Tab("🛡️ 防御分析"): - gr.Markdown("## 🔍 多维度攻防效果对比分析") - gr.Markdown(f"### 1️⃣ 攻击成功率全景对比 (AUC)\n\n> 柱子越短 = AUC越低 = 防御越有效。基线AUC={bl_auc:.4f},标签平滑最低降至{gm('smooth_eps_0.2','auc'):.4f},输出扰动最低降至{gm('perturbation_0.03','auc'):.4f}。") - gr.Plot(value=fig_auc_bar()) - + # D5 + gr.HTML('
D5\u843d\u5730\u4ee3\u4ef7\u7ef4\u5ea6 \u2014 \u8bc1\u660e\u201c\u9690\u79c1\u4e0e\u6548\u7528\u7684\u5e73\u8861\u201d
') gr.Markdown(f"""\ -### 2️⃣ 多指标雷达图对比(全部11组实验) - -> **左图:标签平滑系列5个模型** -> - 红色(Baseline)面积最大 = 攻击全面有效 -> - 随着 ε 从 0.02 增至 0.2,雷达面积逐步缩小 = 防御逐步增强 -> - 特别注意 TPR@1%FPR 和 LossGap 两个轴,缩小最显著 +> **LS(\u6807\u7b7e\u5e73\u6ed1)\uff1a** \u7ea2\u7ebf(AUC)\u6301\u7eed\u4e0b\u964d + \u7eff\u7ebf(\u6548\u7528)\u9006\u52bf\u4e0a\u626c\u81f3{gu('smooth_eps_0.2'):.1f}% = **\u53cc\u8d62(Win-Win)** > -> **右图:输出扰动系列7个配置** -> - 红色(Baseline)同样是最大的 -> - 随着 σ 从 0.005 增至 0.03,绿色系雷达逐步缩小 -> - OP在LossGap和TPR@5%维度上降幅尤其明显 +> **OP(\u8f93\u51fa\u6270\u52a8)\uff1a** AUC\u6301\u7eed\u4e0b\u964d + \u6548\u7528\u7ebf\u59cb\u7ec8\u6c34\u5e73\u4e0d\u53d8 = **\u96f6\u6548\u7528\u635f\u8017** > -> **结论:** 两种防御均在所有维度上全面压制攻击能力,不是只降低了某一个指标。 +> \u4ece\u6563\u70b9\u56fe\u770b\uff0c\u57fa\u7ebf\u5904\u4e8e\u201c\u4f4e\u6548\u7528+\u9ad8\u98ce\u9669\u201d\u7684\u5371\u9669\u533a\uff0cLS\u5411\u53f3\u4e0b\u65b9\u79fb\u52a8\uff08\u7406\u60f3\u533a\uff09\uff0cOP\u5411\u6b63\u4e0b\u65b9\u79fb\u52a8\u3002 """) - gr.Plot(value=fig_radar()) - - gr.Markdown("### 3️⃣ ROC曲线对比\n\n> 曲线越贴近对角线=攻击越接近随机猜测=防御越有效。左图标签平滑,右图输出扰动。") - gr.Plot(value=fig_roc_curves()) - gr.Markdown(f"### 4️⃣ 低误报率下的攻击能力\n\n> 基线 TPR@5%FPR={gm('baseline','tpr_at_5fpr'):.4f},防御后显著下降。这是衡量攻击危害的最严格指标。") - gr.Plot(value=fig_tpr_at_low_fpr()) - gr.Markdown("### 5️⃣ Loss差距对比\n\n> Gap越小=成员与非成员越难区分=防御越有效。防御的目标就是缩小这个差距。") - gr.Plot(value=fig_loss_gap_waterfall()) - gr.Markdown("### 6️⃣ 防御参数与效果关系\n\n> 左图:AUC递减+效用反升=双赢。右图:绿色填充面积=防御收益。") - gr.Plot(value=fig_auc_trend()) - - with gr.Accordion("📉 Loss分布直方图(标签平滑 5模型)", open=False): - gr.Plot(value=fig_loss_dist()) - with gr.Accordion("📉 Loss分布直方图(输出扰动 6组)", open=False): - gr.Plot(value=fig_perturb_dist()) - - with gr.Accordion("📖 每个模型/参数详细分析", open=False): - detail_md = "## 逐一详细分析\n\n" - detail_md += f"""\ -### 基线模型 (Baseline, 无防御) -| 指标 | 值 | 含义 | -|---|---|---| -| AUC | **{gm('baseline','auc'):.4f}** | 攻击明显优于随机猜测(0.5) | -| 攻击准确率 | **{gm('baseline','attack_accuracy'):.4f}** | 超过60%的样本被正确判定 | -| 精确率 | **{gm('baseline','precision'):.4f}** | 攻击者判定为成员的样本中,{gm('baseline','precision')*100:.1f}%确实是成员 | -| 召回率 | **{gm('baseline','recall'):.4f}** | 所有真正成员中,{gm('baseline','recall')*100:.1f}%被成功识别 | -| F1 | **{gm('baseline','f1'):.4f}** | 精确率和召回率的调和平均 | -| TPR@5%FPR | **{gm('baseline','tpr_at_5fpr'):.4f}** | 低误报下仍能识别{gm('baseline','tpr_at_5fpr')*100:.1f}%成员 | -| TPR@1%FPR | **{gm('baseline','tpr_at_1fpr'):.4f}** | 极低误报下识别{gm('baseline','tpr_at_1fpr')*100:.1f}%成员 | -| Loss差距 | **{gm('baseline','loss_gap'):.4f}** | 攻击可利用的信号强度 | -| 效用 | **{gu('baseline'):.1f}%** | 300道测试题准确率 | - ---- -""" - for eps in [0.02, 0.05, 0.1, 0.2]: - k = f"smooth_eps_{eps}" - detail_md += f"""\ -### 标签平滑 LS(ε={eps}) -| 指标 | 值 | vs基线 | 变化 | -|---|---|---|---| -| AUC | {gm(k,'auc'):.4f} | {bl_auc:.4f} | {gm(k,'auc')-bl_auc:+.4f} ({(gm(k,'auc')-bl_auc)/bl_auc*100:+.1f}%) | -| 攻击准确率 | {gm(k,'attack_accuracy'):.4f} | {gm('baseline','attack_accuracy'):.4f} | {gm(k,'attack_accuracy')-gm('baseline','attack_accuracy'):+.4f} | -| F1 | {gm(k,'f1'):.4f} | {gm('baseline','f1'):.4f} | {gm(k,'f1')-gm('baseline','f1'):+.4f} | -| TPR@5%FPR | {gm(k,'tpr_at_5fpr'):.4f} | {gm('baseline','tpr_at_5fpr'):.4f} | {gm(k,'tpr_at_5fpr')-gm('baseline','tpr_at_5fpr'):+.4f} | -| TPR@1%FPR | {gm(k,'tpr_at_1fpr'):.4f} | {gm('baseline','tpr_at_1fpr'):.4f} | {gm(k,'tpr_at_1fpr')-gm('baseline','tpr_at_1fpr'):+.4f} | -| Loss差距 | {gm(k,'loss_gap'):.4f} | {gm('baseline','loss_gap'):.4f} | {gm(k,'loss_gap')-gm('baseline','loss_gap'):+.4f} | -| 效用 | {gu(k):.1f}% | {bl_acc:.1f}% | {gu(k)-bl_acc:+.1f}% | - ---- -""" - # 🌟🌟🌟 这里就是修复缺失表格行的核心位置! 🌟🌟🌟 - for sigma in OP_SIGMAS: - k = f"perturbation_{sigma}" - detail_md += f"""\ -### 输出���动 OP(σ={sigma}) -| 指标 | 值 | vs基线 | 变化 | -|---|---|---|---| -| AUC | {gm(k,'auc'):.4f} | {bl_auc:.4f} | {gm(k,'auc')-bl_auc:+.4f} ({(gm(k,'auc')-bl_auc)/bl_auc*100:+.1f}%) | -| 攻击准确率 | {gm(k,'attack_accuracy'):.4f} | {gm('baseline','attack_accuracy'):.4f} | {gm(k,'attack_accuracy')-gm('baseline','attack_accuracy'):+.4f} | -| F1 | {gm(k,'f1'):.4f} | {gm('baseline','f1'):.4f} | {gm(k,'f1')-gm('baseline','f1'):+.4f} | -| TPR@5%FPR | {gm(k,'tpr_at_5fpr'):.4f} | {gm('baseline','tpr_at_5fpr'):.4f} | {gm(k,'tpr_at_5fpr')-gm('baseline','tpr_at_5fpr'):+.4f} | -| TPR@1%FPR | {gm(k,'tpr_at_1fpr'):.4f} | {gm('baseline','tpr_at_1fpr'):.4f} | {gm(k,'tpr_at_1fpr')-gm('baseline','tpr_at_1fpr'):+.4f} | -| Loss差距 | {gm(k,'loss_gap'):.4f} | {gm('baseline','loss_gap'):.4f} | {gm(k,'loss_gap')-gm('baseline','loss_gap'):+.4f} | -| 效用 | {bl_acc:.1f}% | {bl_acc:.1f}% | 0.0% (零损失) | - ---- -""" - gr.Markdown(detail_md) - - with gr.Tab("⚖️ 效用评估"): - gr.Markdown("## 📊 模型效用测试") - with gr.Row(): - with gr.Column(): gr.Plot(value=fig_acc_bar()) - with gr.Column(): gr.Plot(value=fig_tradeoff()) - gr.Markdown(f"> 标签平滑实现“双赢”:基线{bl_acc:.1f}% → LS(ε=0.2) {gu('smooth_eps_0.2'):.1f}%。输出扰动始终保持{bl_acc:.1f}%。") - gr.Markdown("### 🧪 在线抽题演示") - with gr.Row(): + gr.Plot(value=fig_d5_trend()) + with gr.Row(equal_height=True): + with gr.Column():gr.Plot(value=fig_acc()) + with gr.Column():gr.Plot(value=fig_d5_scatter()) + + # 完整数据表 + with gr.Accordion("\u5b8c\u6574\u6570\u636e\u8868",open=False): + gr.Markdown(build_table()) + + with gr.Tab("\u2696\ufe0f \u6548\u7528\u8bc4\u4f30"): + gr.Markdown("## \u6a21\u578b\u6548\u7528\u6d4b\u8bd5") + gr.Markdown("### \u5728\u7ebf\u62bd\u6837") + with gr.Row(equal_height=True): with gr.Column(scale=1): - gr.Markdown("#### ⚙️ 测试配置") - e_m = gr.Dropdown(choices=EVAL_CHOICES, value="基线模型", label="🤖 选择测试模型", interactive=True) - e_b = gr.Button("🎲 随机抽题测试", variant="primary") - with gr.Column(scale=2): - gr.Markdown("#### 📝 模型作答结果") - e_r = gr.HTML() - e_b.click(cb_eval, [e_m], [e_r]) + e_m=gr.Radio(EV_C,value="\u57fa\u7ebf\u6a21\u578b",label="\u6a21\u578b");e_b=gr.Button("\U0001f3b2 \u62bd\u9898",variant="primary") + with gr.Column(scale=2):e_r=gr.HTML() + e_b.click(cb_eval,[e_m],[e_r]) - with gr.Tab("📝 研究结论"): + with gr.Tab("\U0001f4dd \u7814\u7a76\u7ed3\u8bba"): gr.Markdown(f"""\ -## 核心研究发现 - ---- +## \u6838\u5fc3\u7814\u7a76\u53d1\u73b0 -### 结论一:教育大模型存在可量化的MIA风险 +### \u7ed3\u8bba1\uff1a\u6559\u80b2\u5927\u6a21\u578b\u5b58\u5728\u53ef\u91cf\u5316MIA\u98ce\u9669 +\u57fa\u7ebf AUC={BA:.4f}\uff0c\u653b\u51fb\u51c6\u786e\u7387{gm('baseline','attack_accuracy')*100:.1f}%\uff0cTPR@5%FPR={gm('baseline','tpr_at_5fpr'):.4f}\u3002 -基线模型的MIA攻击 AUC = **{bl_auc:.4f}**,显著高于随机猜测的0.5。攻击准确率达 **{gm('baseline','attack_accuracy')*100:.1f}%**,远超50%。在TPR@5%FPR={gm('baseline','tpr_at_5fpr'):.4f}的严格条件下,攻击者仍能识别近五分之一的训练成员。这证明教育大模型确实存在学生隐私泄露风险。 +### \u7ed3\u8bba2\uff1a\u6807\u7b7e\u5e73\u6ed1\u662f\u6709\u6548\u7684\u8bad\u7ec3\u671f\u9632\u5fa1 +AUC\u4ece{BA:.4f}\u964d\u81f3{gm('smooth_eps_0.2','auc'):.4f}\uff0c\u6548\u7528\u4ece{BAC:.1f}%\u5347\u81f3{gu('smooth_eps_0.2'):.1f}%\u3002**\u03b5\u22650.05\u65f6\u5b9e\u73b0\u53cc\u8d62\u3002** -### 结论二:标签平滑是有效的训练期防御 +### \u7ed3\u8bba3\uff1a\u8f93\u51fa\u6270\u52a8\u662f\u6709\u6548\u7684\u63a8\u7406\u671f\u9632\u5fa1 +AUC\u4ece{BA:.4f}\u964d\u81f3{gm('perturbation_0.03','auc'):.4f}\uff0c**\u6548\u7528\u96f6\u635f\u5931**\uff0c\u5373\u63d2\u5373\u7528\u3002 -| ε 参数 | AUC | AUC降幅 | 效用 | 效用变化 | -|---|---|---|---|---| -| ε=0.02 | {gm('smooth_eps_0.02','auc'):.4f} | {bl_auc-gm('smooth_eps_0.02','auc'):.4f} | {gu('smooth_eps_0.02'):.1f}% | {gu('smooth_eps_0.02')-bl_acc:+.1f}% | -| ε=0.05 | {gm('smooth_eps_0.05','auc'):.4f} | {bl_auc-gm('smooth_eps_0.05','auc'):.4f} | {gu('smooth_eps_0.05'):.1f}% | {gu('smooth_eps_0.05')-bl_acc:+.1f}% | -| ε=0.1 | {gm('smooth_eps_0.1','auc'):.4f} | {bl_auc-gm('smooth_eps_0.1','auc'):.4f} | {gu('smooth_eps_0.1'):.1f}% | {gu('smooth_eps_0.1')-bl_acc:+.1f}% | -| ε=0.2 | {gm('smooth_eps_0.2','auc'):.4f} | {bl_auc-gm('smooth_eps_0.2','auc'):.4f} | {gu('smooth_eps_0.2'):.1f}% | {gu('smooth_eps_0.2')-bl_acc:+.1f}% | - -**重要发现:ε≥0.05时,隐私保护和模型效用同时提升(“双赢”)。** 这是因为标签平滑的正则化效应防止了过拟合。 - -### 结论三:输出扰动是有效���推理期防御 - -| σ 参数 | AUC | AUC降幅 | 效用 | -|---|---|---|---| -| σ=0.005 | {gm('perturbation_0.005','auc'):.4f} | {bl_auc-gm('perturbation_0.005','auc'):.4f} | {bl_acc:.1f}% | -| σ=0.01 | {gm('perturbation_0.01','auc'):.4f} | {bl_auc-gm('perturbation_0.01','auc'):.4f} | {bl_acc:.1f}% | -| σ=0.015 | {gm('perturbation_0.015','auc'):.4f} | {bl_auc-gm('perturbation_0.015','auc'):.4f} | {bl_acc:.1f}% | -| σ=0.02 | {gm('perturbation_0.02','auc'):.4f} | {bl_auc-gm('perturbation_0.02','auc'):.4f} | {bl_acc:.1f}% | -| σ=0.025 | {gm('perturbation_0.025','auc'):.4f} | {bl_auc-gm('perturbation_0.025','auc'):.4f} | {bl_acc:.1f}% | -| σ=0.03 | {gm('perturbation_0.03','auc'):.4f} | {bl_auc-gm('perturbation_0.03','auc'):.4f} | {bl_acc:.1f}% | - -### 结论四:最佳实践建议 - -> **推荐组合方案: LS(ε=0.1) + OP(σ=0.02)** -> -> - 训练期:标签平滑从源头降低记忆,缩小Loss差距 -> - 推理期:输出扰动遮蔽残余信号,进一步降低AUC -> - 两者机制互补,可叠加使用 +### \u7ed3\u8bba4\uff1a\u63a8\u8350\u7ec4\u5408\u65b9\u6848 +> **LS(\u03b5=0.1) + OP(\u03c3=0.02)** \u2014 \u8bad\u7ec3\u671f\u7f29\u5c0fGap + \u63a8\u7406\u671f\u566a\u58f0\u906e\u853d\uff0c\u53cc\u91cd\u9632\u5fa1\u3002 +### \u7814\u7a76\u610f\u4e49 +1. \u9996\u6b21\u5728\u4e2d\u6587\u6559\u80b2\u573a\u666f\u4e0b\u7cfb\u7edf\u9a8c\u8bc1MIA\u98ce\u9669 +2. \u4e3a\u6559\u80b2AI\u90e8\u7f72\u63d0\u4f9b\u5177\u4f53\u9690\u79c1\u9632\u62a4\u65b9\u6848 +3. \u53d1\u73b0\u6807\u7b7e\u5e73\u6ed1\u7684\u201c\u9690\u79c1-\u6548\u7528\u53cc\u8d62\u201d\u6548\u5e94 +4. \u63d0\u4f9b\u4ece\u653b\u51fb\u68c0\u6d4b\u5230\u53cc\u91cd\u9632\u5fa1\u7684\u5b8c\u6574\u65b9\u6848 +5. \u6240\u6709\u4ee3\u7801/\u6570\u636e/\u7ed3\u679c\u5747\u53ef\u590d\u73b0 """) -demo.launch(theme=gr.themes.Soft(), css=CSS) \ No newline at end of file +demo.launch() \ No newline at end of file