xiaohy commited on
Commit
51e503d
·
verified ·
1 Parent(s): e6a1ec9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +201 -231
app.py CHANGED
@@ -91,7 +91,7 @@ for i in range(300):
91
  templates = [
92
  (f"小明有{a}个苹果,吃掉了{b}个,还剩多少个?", str(a - b)),
93
  (f"每组有{a}人,共{b}组,一共多少人?", str(a * b)),
94
- (f"图书馆有{a}书,借出{b}本后又买了{c}本,现在有多少本?", str(a - b + c)),
95
  (f"商店有{a}支铅笔,卖出{b}支,还剩多少支?", str(a - b)),
96
  (f"小红有{a}颗糖,小明给了她{b}颗,现在有多少颗?", str(a + b)),
97
  ]
@@ -138,14 +138,13 @@ def make_loss_distribution():
138
  for ax, (k, title) in zip(axes, items):
139
  m = full_results[k]['member_losses']; nm_l = full_results[k]['non_member_losses']
140
  bins = np.linspace(min(min(m), min(nm_l)), max(max(m), max(nm_l)), 30)
141
- ax.hist(m, bins=bins, alpha=0.55, color='#3b82f6', label='Member', density=True)
142
- ax.hist(nm_l, bins=bins, alpha=0.55, color='#ef4444', label='Non-Member', density=True)
143
- ax.set_title(title, fontsize=13, fontweight='bold', color="#0f172a")
144
- ax.set_xlabel('Loss', fontsize=11, color="#475569"); ax.set_ylabel('Density', fontsize=11, color="#475569")
145
- ax.legend(fontsize=10, frameon=True, facecolor="white", edgecolor="#e2e8f0"); ax.tick_params(labelsize=10, colors="#475569")
146
- ax.grid(True, linestyle=':', alpha=0.4, color="#94a3b8")
147
  ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False)
148
- ax.spines['bottom'].set_color('#cbd5e1'); ax.spines['left'].set_color('#cbd5e1')
149
  plt.tight_layout()
150
  return fig
151
 
@@ -163,102 +162,96 @@ def make_perturb_loss_distribution():
163
  nm_pert = nm_losses + np.random.normal(0, sigma, len(nm_losses))
164
  vals = np.concatenate([m_pert, nm_pert])
165
  bins = np.linspace(vals.min(), vals.max(), 30)
166
- ax.hist(m_pert, bins=bins, alpha=0.55, color='#3b82f6', label='Member+noise', density=True)
167
- ax.hist(nm_pert, bins=bins, alpha=0.55, color='#ef4444', label='Non-Member+noise', density=True)
168
  pk = 'perturbation_' + str(sigma)
169
  pauc = perturb_results.get(pk, {}).get('auc', 0)
170
- ax.set_title(f'OP(s={sigma})\nAUC={pauc:.4f}', fontsize=13, fontweight='bold', color="#0f172a")
171
- ax.set_xlabel('Loss', fontsize=11, color="#475569"); ax.set_ylabel('Density', fontsize=11, color="#475569")
172
- ax.legend(fontsize=9, frameon=True, facecolor="white", edgecolor="#e2e8f0"); ax.tick_params(labelsize=10, colors="#475569")
173
- ax.grid(True, linestyle=':', alpha=0.4, color="#94a3b8")
174
  ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False)
175
- ax.spines['bottom'].set_color('#cbd5e1'); ax.spines['left'].set_color('#cbd5e1')
176
  plt.tight_layout()
177
  return fig
178
 
179
 
180
  def make_auc_bar():
181
  methods, aucs, colors = [], [], []
182
- for k, n, c in [('baseline', 'Baseline', '#94a3b8'), ('smooth_0.02', 'LS(e=0.02)', '#3b82f6'),
183
- ('smooth_0.2', 'LS(e=0.2)', '#1d4ed8')]:
184
  if k in mia_results: methods.append(n); aucs.append(mia_results[k]['auc']); colors.append(c)
185
- for k, n, c in [('perturbation_0.01', 'OP(s=0.01)', '#34d399'), ('perturbation_0.015', 'OP(s=0.015)', '#10b981'),
186
- ('perturbation_0.02', 'OP(s=0.02)', '#059669')]:
187
  if k in perturb_results: methods.append(n); aucs.append(perturb_results[k]['auc']); colors.append(c)
188
  fig, ax = plt.subplots(figsize=(12, 6))
189
- bars = ax.bar(methods, aucs, color=colors, width=0.45, edgecolor='none')
190
  for bar, a in zip(bars, aucs):
191
- ax.text(bar.get_x()+bar.get_width()/2, bar.get_height()+0.002, f'{a:.4f}', ha='center', va='bottom', fontsize=11, fontweight='bold', color="#1e293b", family='monospace')
192
- ax.axhline(y=0.5, color='#ef4444', linestyle='--', linewidth=1.5, alpha=0.8, label='Random Guess (0.5)')
193
- ax.set_ylabel('MIA AUC', fontsize=12, color="#475569"); ax.set_ylim(0.48, max(aucs)+0.035)
194
- ax.legend(fontsize=10, frameon=False); ax.grid(axis='y', linestyle=':', alpha=0.4, color="#94a3b8")
195
  ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False)
196
- ax.spines['bottom'].set_color('#cbd5e1'); ax.spines['left'].set_color('#cbd5e1')
197
- plt.xticks(fontsize=11, color="#334155"); plt.tight_layout()
198
  return fig
199
 
200
 
201
  def make_tradeoff():
202
  fig, ax = plt.subplots(figsize=(10, 7))
203
  pts = []
204
- for k, n, mk, c, sz in [('baseline','Baseline','o','#94a3b8',220), ('smooth_0.02','LS(e=0.02)','s','#3b82f6',200), ('smooth_0.2','LS(e=0.2)','s','#1d4ed8',200)]:
205
  if k in mia_results and k in utility_results:
206
  pts.append({'n':n,'a':mia_results[k]['auc'],'c':utility_results[k]['accuracy'],'m':mk,'co':c,'s':sz})
207
  ba = utility_results.get('baseline',{}).get('accuracy',0.633)
208
- for k, n, mk, c, sz in [('perturbation_0.01','OP(s=0.01)','^','#34d399',200), ('perturbation_0.015','OP(s=0.015)','D','#10b981',160), ('perturbation_0.02','OP(s=0.02)','^','#059669',200)]:
209
  if k in perturb_results: pts.append({'n':n,'a':perturb_results[k]['auc'],'c':ba,'m':mk,'co':c,'s':sz})
210
  for p in pts:
211
- ax.scatter(p['c'], p['a'], label=p['n'], marker=p['m'], color=p['co'], s=p['s'], edgecolors='white', linewidth=1.5, zorder=5)
212
- ax.axhline(y=0.5, color='#cbd5e1', linestyle='--', alpha=0.8, label='Random Guess')
213
- ax.set_xlabel('Utility (Accuracy)', fontsize=12, fontweight='bold', color="#475569"); ax.set_ylabel('Privacy Risk (MIA AUC)', fontsize=12, fontweight='bold', color="#475569")
214
- ax.set_title('Privacy-Utility Trade-off', fontsize=14, fontweight='bold', color="#0f172a")
215
  aa=[p['c'] for p in pts]; ab=[p['a'] for p in pts]
216
  if aa and ab: ax.set_xlim(min(aa)-0.03,max(aa)+0.05); ax.set_ylim(min(min(ab),0.5)-0.02,max(ab)+0.025)
217
- ax.legend(loc='upper right', fontsize=10, frameon=True, facecolor="white", edgecolor="#e2e8f0"); ax.grid(True, linestyle=':', alpha=0.4, color="#94a3b8")
218
  ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False)
219
- ax.spines['bottom'].set_color('#cbd5e1'); ax.spines['left'].set_color('#cbd5e1')
220
- ax.tick_params(colors="#475569")
221
  plt.tight_layout(); return fig
222
 
223
 
224
  def make_accuracy_bar():
225
  names, accs, colors = [], [], []
226
- for k, n, c in [('baseline','Baseline','#94a3b8'), ('smooth_0.02','LS(e=0.02)','#3b82f6'), ('smooth_0.2','LS(e=0.2)','#1d4ed8')]:
227
  if k in utility_results: names.append(n); accs.append(utility_results[k]['accuracy']*100); colors.append(c)
228
  bp = utility_results.get('baseline',{}).get('accuracy',0)*100
229
- for k, n, c in [('perturbation_0.01','OP(s=0.01)','#34d399'), ('perturbation_0.015','OP(s=0.015)','#10b981'), ('perturbation_0.02','OP(s=0.02)','#059669')]:
230
  if k in perturb_results: names.append(n); accs.append(bp); colors.append(c)
231
  fig, ax = plt.subplots(figsize=(12, 6))
232
- bars = ax.bar(names, accs, color=colors, width=0.45, edgecolor='none')
233
  for bar, acc in zip(bars, accs):
234
- ax.text(bar.get_x()+bar.get_width()/2, bar.get_height()+1.0, f'{acc:.1f}%', ha='center', va='bottom', fontsize=11, fontweight='bold', color="#1e293b", family='monospace')
235
- ax.set_ylabel('Accuracy (%)', fontsize=12, color="#475569"); ax.set_ylim(0, 100)
236
- ax.grid(axis='y', linestyle=':', alpha=0.4, color="#94a3b8"); ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False)
237
- ax.spines['bottom'].set_color('#cbd5e1'); ax.spines['left'].set_color('#cbd5e1')
238
- plt.xticks(fontsize=11, color="#334155"); plt.tight_layout(); return fig
239
 
240
 
241
  def make_loss_gauge(loss_val, m_mean, nm_mean, threshold, m_std, nm_std):
242
  fig, ax = plt.subplots(figsize=(9, 3))
243
  x_min = min(m_mean-3*m_std, loss_val-0.01); x_max = max(nm_mean+3*nm_std, loss_val+0.01)
244
- ax.axvspan(x_min, threshold, alpha=0.08, color='#3b82f6')
245
- ax.axvspan(threshold, x_max, alpha=0.08, color='#ef4444')
246
- ax.axvline(x=threshold, color='#0f172a', linewidth=2, zorder=3)
247
- ax.text(threshold, 1.12, 'Threshold', ha='center', va='bottom', fontsize=10, fontweight='bold', color='#0f172a', transform=ax.get_xaxis_transform())
248
- ax.axvline(x=m_mean, color='#3b82f6', linewidth=1.5, linestyle='--', alpha=0.7)
249
- ax.text(m_mean, -0.3, f'Member Mean\n({m_mean:.4f})', ha='center', va='top', fontsize=8, color='#2563eb', transform=ax.get_xaxis_transform(), family='monospace')
250
- ax.axvline(x=nm_mean, color='#ef4444', linewidth=1.5, linestyle='--', alpha=0.7)
251
- ax.text(nm_mean, -0.3, f'Non-Mem Mean\n({nm_mean:.4f})', ha='center', va='top', fontsize=8, color='#dc2626', transform=ax.get_xaxis_transform(), family='monospace')
252
- mc = '#3b82f6' if loss_val < threshold else '#ef4444'
253
- ax.plot(loss_val, 0.5, marker='d', markersize=14, color=mc, zorder=5, transform=ax.get_xaxis_transform())
254
  ax.text(loss_val, 0.78, f'Loss={loss_val:.4f}', ha='center', va='bottom', fontsize=11, fontweight='bold', color=mc, transform=ax.get_xaxis_transform(),
255
- bbox=dict(boxstyle='square,pad=0.4', facecolor='white', edgecolor=mc, alpha=0.95), family='monospace')
256
- ax.text((x_min+threshold)/2, 0.5, 'Member Zone', ha='center', va='center', fontsize=11, color='#3b82f6', fontweight='bold', alpha=0.6, transform=ax.get_xaxis_transform())
257
- ax.text((threshold+x_max)/2, 0.5, 'Non-Member Zone', ha='center', va='center', fontsize=11, color='#ef4444', fontweight='bold', alpha=0.6, transform=ax.get_xaxis_transform())
258
  ax.set_xlim(x_min, x_max); ax.set_yticks([])
259
  for sp in ['top','right','left']: ax.spines[sp].set_visible(False)
260
- ax.spines['bottom'].set_color('#cbd5e1')
261
- ax.set_xlabel('Loss Value', fontsize=10, color="#475569"); plt.tight_layout(); return fig
262
 
263
 
264
  # ========================================
@@ -270,12 +263,12 @@ def show_random_sample(data_type):
270
  sample = data[np.random.randint(0, len(data))]
271
  meta = sample['metadata']
272
  task_map = {'calculation':'基础计算','word_problem':'应用题','concept':'概念问答','error_correction':'错题订正'}
273
- info_md = ("### 🔐 截获的隐私元数据 (Metadata)\n\n"
274
- "- **姓名 / Name**: `" + clean_text(str(meta.get('name',''))) + "`\n"
275
- "- **学号 / Student ID**: `" + clean_text(str(meta.get('student_id',''))) + "`\n"
276
- "- **�� / Class**: `" + clean_text(str(meta.get('class',''))) + "`\n"
277
- "- **成绩 / Score**: `" + clean_text(str(meta.get('score',''))) + " 分`\n"
278
- "- **类型 / Task**: `" + task_map.get(sample.get('task_type',''),'') + "`\n")
279
  return info_md, clean_text(sample.get('question','')), clean_text(sample.get('answer',''))
280
 
281
 
@@ -334,18 +327,18 @@ def run_mia_demo(sample_index, data_type, model_choice):
334
  ac = "🔴" if is_member else "🟢"
335
 
336
  if attack_correct and pred_member and is_member:
337
- v = "⚠️ **攻击成功: 隐私泄露判定 (Privacy Compromised)**"; vd = "系统风控日志:模型对该样本过于熟悉(Loss低于阈值),攻击者成功判定其为训练集数据。"
338
  elif attack_correct:
339
- v = "✅ **拦截成功: 边界正确 (Boundary Secured)**"; vd = "系统风控日志:样本行为符合非成员特征,攻击者的判定与真实身份一致。"
340
  else:
341
- v = "🛡️ **防御生效: 攻击失误 (Attack Failed)**"; vd = "系统风控日志:攻击者的探测逻辑失效,模型成功隐藏了真实身份特征。"
342
-
343
- result_md = (f"### {v}\n\n> {vd}\n\n"
344
- "**TARGET MODEL**: `" + display_label + "` | **AUC METRIC**: `" + f"{model_auc:.4f}" + "`\n\n"
345
- "| INDICATOR | PREDICTION (ATTACKER) | GROUND TRUTH (SYSTEM) |\n|---|---|---|\n"
346
- "| STATUS | " + pc + " " + pl + " | " + ac + " " + al + " |\n"
347
- "| METRICS | Loss: `" + f"{loss:.4f}" + "` | Threshold: `" + f"{threshold:.4f}" + "` |\n")
348
- q_text = "**QUERY TRACKING ID [" + str(idx) + "] :**\n\n" + clean_text(sample.get('question',''))[:500]
349
  return q_text, gauge_fig, result_md
350
 
351
 
@@ -376,213 +369,190 @@ def run_eval_demo(eval_model):
376
  is_correct = q.get(model_key, q.get('baseline', False))
377
  icon = "✅" if is_correct else "❌"
378
  result_md = (
379
- "### 📊 在线效用验证测试\n\n"
380
- "**TARGET MODEL**: `" + eval_model + "` | **ACCURACY**: `" + f"{overall_acc:.1f}" + "%`\n\n"
381
- "| 字段 (FIELD) | 数据 (DATA) |\n|---|---|\n"
382
- "| TICKET ID | `#" + str(idx+1) + " / 300` |\n"
383
- "| TASK TYPE | `" + q.get('type_cn', q['type']) + "` |\n"
384
- "| PROMPT | " + q['question'] + " |\n"
385
- "| EXPECTED | " + q['answer'] + " |\n"
386
- "| RESULT | " + icon + " " + ("正确 (Correct)" if is_correct else "错误 (Incorrect)") + " |\n\n")
387
  if eval_model.startswith("输出扰动"):
388
- result_md += "> ℹ️ *SYSTEM NOTE: 输出扰动策略不改变底层权重结构故维持基线准确率水平*\n"
389
  return result_md
390
 
391
 
392
  # ========================================
393
- # Interface Design (SaaS Dashboard Style)
394
  # ========================================
395
 
396
  CSS = """
397
- /* Import tech fonts */
398
- @import url('https://fonts.googleapis.com/css2?family=Fira+Code:wght@400;500;600&family=Inter:wght@400;500;600;700&display=swap');
399
-
400
- /* Global Layout & Colors */
401
- body { background-color: #f1f5f9 !important; font-family: 'Inter', -apple-system, sans-serif !important; }
402
- .gradio-container { max-width: 1280px !important; margin: auto !important; }
403
-
404
- /* Dashboard Tabs Navigation */
405
- .tab-nav { border-bottom: 1px solid #cbd5e1 !important; margin-bottom: 24px !important; gap: 8px !important; padding: 0 16px !important; background: transparent !important; }
406
- .tab-nav button { font-family: 'Inter', sans-serif !important; font-size: 14px !important; padding: 12px 24px !important; font-weight: 600 !important; color: #64748b !important; border: none !important; background: transparent !important; border-radius: 6px 6px 0 0 !important; transition: all 0.2s ease !important; letter-spacing: 0.5px !important; }
407
- .tab-nav button:hover { color: #0f172a !important; background: rgba(15, 23, 42, 0.04) !important; }
408
- .tab-nav button.selected { color: #2563eb !important; border-bottom: 2px solid #2563eb !important; background: transparent !important; }
409
-
410
- /* Dashboard Card Panels */
411
- .tabitem { background: #ffffff !important; border-radius: 8px !important; box-shadow: 0 4px 6px -1px rgba(0,0,0,0.05), 0 2px 4px -1px rgba(0,0,0,0.03) !important; padding: 32px !important; border: 1px solid #e2e8f0 !important; }
412
-
413
- /* Typography & Headings */
414
- .prose h1 { font-family: 'Inter', sans-serif !important; font-size: 1.8rem !important; color: #0f172a !important; font-weight: 800 !important; text-align: left !important; border-bottom: 1px solid #e2e8f0 !important; padding-bottom: 16px !important; margin-bottom: 24px !important; }
415
- .prose h2 { display: flex !important; align-items: center !important; font-size: 1.25rem !important; color: #0f172a !important; margin-top: 2rem !important; margin-bottom: 1rem !important; padding-bottom: 0 !important; border-bottom: none !important; font-weight: 700 !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; }
416
- .prose h2::before { content: ''; display: inline-block !important; width: 4px !important; height: 18px !important; background: #2563eb !important; margin-right: 12px !important; border-radius: 2px !important; }
417
- .prose h3 { font-size: 1.1rem !important; color: #334155 !important; font-weight: 600 !important; margin-top: 1.5rem !important; }
418
-
419
- /* Code & Data Highlights */
420
- .prose code { font-family: 'Fira Code', monospace !important; font-size: 0.85em !important; background: #f1f5f9 !important; color: #0f172a !important; padding: 2px 6px !important; border-radius: 4px !important; border: 1px solid #e2e8f0 !important; }
421
-
422
- /* Modern Data Tables */
423
- .prose table { width: 100% !important; border-collapse: separate !important; border-spacing: 0 !important; margin: 1.5rem 0 !important; border-radius: 6px !important; overflow: hidden !important; border: 1px solid #e2e8f0 !important; font-family: 'Fira Code', monospace !important; font-size: 0.85rem !important; }
424
- .prose th { background: #f8fafc !important; color: #475569 !important; font-weight: 600 !important; padding: 12px 16px !important; text-align: left !important; border-bottom: 1px solid #e2e8f0 !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; }
425
- .prose tr:nth-child(even) td { background: #ffffff !important; }
426
- .prose td { padding: 12px 16px !important; color: #1e293b !important; border-bottom: 1px solid #f1f5f9 !important; transition: background-color 0.2s !important; }
427
- .prose tr:last-child td { border-bottom: none !important; }
428
- .prose tr:hover td { background-color: #f8fafc !important; }
429
-
430
- /* Alert / Blockquote Panels */
431
- .prose blockquote { border-left: 4px solid #3b82f6 !important; background: #eff6ff !important; padding: 16px 20px !important; border-radius: 0 6px 6px 0 !important; color: #1d4ed8 !important; font-weight: 500 !important; font-size: 0.95rem !important; margin: 1.5rem 0 !important; }
432
-
433
- /* Interactive Buttons */
434
- button.primary { background: #0f172a !important; color: #ffffff !important; border: none !important; border-radius: 6px !important; font-family: 'Inter', sans-serif !important; font-weight: 600 !important; font-size: 14px !important; padding: 10px 20px !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; transition: all 0.2s ease !important; box-shadow: 0 4px 6px -1px rgba(15, 23, 42, 0.1) !important; }
435
- button.primary:hover { background: #1e293b !important; transform: translateY(-1px) !important; box-shadow: 0 6px 10px -1px rgba(15, 23, 42, 0.2) !important; }
436
-
437
- /* Hide Gradio Footer */
438
  footer { display: none !important; }
439
  """
440
 
441
- # 使用 Base 主题,这剥离了原本 Soft 主题那种廉价的 AI 聊天框感觉
442
- tech_theme = gr.themes.Base(
443
- primary_hue="blue",
444
- secondary_hue="slate",
445
- neutral_hue="slate",
446
- font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"]
447
- )
448
 
449
- with gr.Blocks(title="AI Privacy Dashboard", theme=tech_theme, css=CSS) as demo:
450
 
451
- gr.Markdown("# MODEL PRIVACY AUDIT DASHBOARD\n\n> 探究大语言模型的隐私泄露特征 (MIA),定量评估防御策略 (Label Smoothing & Output Perturbation) 的干预效能。\n")
452
-
453
- with gr.Tab("SYSTEM OVERVIEW (概览)"):
454
  gr.Markdown(
455
- "## 1. BACKGROUND & METRICS\n\n大语言模型在微调链路中存在敏感数据烙印。**成员推理攻击 (Membership Inference Attack, MIA)** 旨在通过探测模型置信度分布,反推数据归属,构成严重的数据合规威胁。\n\n---\n\n"
456
- "## 2. PIPELINE ARCHITECTURE\n\n"
457
- "| PHASE (阶段) | TARGET (目标) | METHODOLOGY (方法) |\n|------|------|------|\n"
458
- "| DATA PREP | 2000 Instances | 生成结构化对话数据注入合规高危字段 (Metadata) |\n"
459
- "| BASELINE | Qwen2.5-Math-1.5B | 基于 LoRA 标准微调指令学习 (干预) |\n"
460
- "| DEFENSE I | Label Smoothing | 对比训练平滑系数 `e=0.02``e=0.2` |\n"
461
- "| DEFENSE II | Output Perturbation| 在推理端注入方差为 `s` 的高斯噪声 (Gaussian Noise) |\n"
462
- "| EVALUATION | 300 Math Queries | 并行测试 3 组基座结构与 3 组扰动参数的模型效用 |\n"
463
- "| ANALYSIS | Trade-off Matrix | 绘制 `Privacy-Utility` 多维权衡边界 |\n\n---\n\n"
464
- "## 3. EXPERIMENT CONFIG\n\n| KEY | VALUE |\n|------|-----|\n"
465
- "| BACKBONE | `" + model_name_str + "` |\n"
466
- "| FINE-TUNING | `LoRA (r=8, alpha=16)` |\n| EPOCHS | `10` |\n"
467
- "| DATA SIZE | `" + data_size_str + "` |\n| ACTIVE MODELS | `3` |\n")
468
-
469
- with gr.Tab("DATA EXAMINER (数据分析)"):
470
- gr.Markdown("## DATASET TOPOLOGY\n\n"
471
- "- **MEMBER POOL** (`1000` instances): 模型训练特征库为 MIA 攻击正样本。\n"
472
- "- **NON-MEMBER POOL** (`1000` instances): 对照隔离区,为 MIA 攻击负样本。\n"
473
- "- *Note: 数据享有态 Schema,攻击者处于完全黑盒环境 (Black-box)。*\n\n"
474
- "### TASK DISTRIBUTION MATRIX\n\n"
475
- "| CATEGORY | COUNT | RATIO |\n|------|------|------|\n"
476
- "| 基础计算 (Calculation) | `800` | `40.0%` |\n| 应用题 (Word Problem) | `600` | `30.0%` |\n| 概念问答 (Concept QA) | `400` | `20.0%` |\n| 错题订正 (Error Correction) | `200` | `10.0%` |\n")
477
  with gr.Row():
478
  with gr.Column():
479
- data_sel = gr.Radio(["成员数据(训练集)","非成员数据(测试集)"], value="成员数据(训练集)", label="SELECT DATA POOL")
480
- sample_btn = gr.Button("FETCH RANDOM SAMPLE", variant="primary")
481
  sample_info = gr.Markdown()
482
  with gr.Column():
483
- sample_q = gr.Textbox(label="PROMPT (INPUT)", lines=5, interactive=False)
484
- sample_a = gr.Textbox(label="GROUND TRUTH (OUTPUT)", lines=5, interactive=False)
485
  sample_btn.click(show_random_sample, [data_sel], [sample_info, sample_q, sample_a])
486
 
487
- with gr.Tab("MIA ATTACK WORKSPACE (攻击工作台)"):
488
- gr.Markdown("## LAUNCH INFERENCE ATTACK\n\n配置攻击目标实体与数据源,系统将执行 Loss 计算并映射攻击边界。\n")
489
  with gr.Row():
490
  with gr.Column():
491
  atk_model = gr.Radio(["基线模型 (Baseline)","标签平滑模型 (e=0.02)","标签平滑模型 (e=0.2)",
492
- "输出扰动 (s=0.01)","输出扰动 (s=0.015)","输出扰动 (s=0.02)"], value="基线模型 (Baseline)", label="TARGET MODEL")
493
- atk_type = gr.Radio(["成员数据(训练集)","非成员数据(测试集)"], value="成员数据(训练集)", label="DATA SOURCE")
494
- atk_idx = gr.Slider(0, 999, step=1, value=0, label="SAMPLE POINTER (ID)")
495
- atk_btn = gr.Button("EXECUTE ATTACK SCRIPT", variant="primary", size="lg")
496
  atk_question = gr.Markdown()
497
  with gr.Column():
498
- gr.Markdown("**ATTACK TELEMETRY (实时雷达)**")
499
- atk_gauge = gr.Plot(label="Loss Distribution Radar")
500
  atk_result = gr.Markdown()
501
  atk_btn.click(run_mia_demo, [atk_idx, atk_type, atk_model], [atk_question, atk_gauge, atk_result])
502
 
503
- with gr.Tab("DEFENSE MATRIX (防御对比)"):
504
- gr.Markdown("## MITIGATION STRATEGIES\n\n"
505
- "| STRATEGY | STAGE | MECHANISM | PRO (优势) | CON (局限) |\n|------|------|------|---------|--------|\n"
506
- "| Label Smoothing | `Training` | 截断过度记忆特征 | AUC 降至 `" + f"{s002_auc:.4f}" + "` | 训练成本增加 |\n"
507
- "| Output Perturb | `Inference` | 掩盖输出分布置信度 | AUC 降至 `" + f"{op002_auc:.4f}" + "` (零损耗) | 仅干扰统计推断 |\n")
508
- gr.Markdown("### 1. AUC DEGRADATION COMPARISON"); gr.Plot(value=make_auc_bar())
509
- gr.Markdown("### 2. LOSS DISTRIBUTION SHIFT (MODEL WEIGHTS)"); gr.Plot(value=make_loss_distribution())
510
- gr.Markdown("### 3. LOSS DISTRIBUTION SHIFT (OUTPUT NOISE)"); gr.Plot(value=make_perturb_loss_distribution())
511
- tbl = "### BENCHMARK RESULTS\n\n| STRATEGY | STAGE | AUC | ACCURACY | AUC DELTA |\n|------|------|-----|--------|--------|\n"
512
- for k, n, cat in [('baseline','Baseline','N/A'),('smooth_0.02','LS(e=0.02)','Training'),('smooth_0.2','LS(e=0.2)','Training')]:
513
  if k in mia_results:
514
  a=mia_results[k]['auc']; acc=utility_results.get(k,{}).get('accuracy',0)*100
515
- d = "`--`" if k=='baseline' else f"`{a-bl_auc:+.4f}`"
516
- tbl += "| `"+n+"` | `"+cat+"` | `"+f"{a:.4f}"+"` | `"+f"{acc:.1f}"+"%` | "+d+" |\n"
517
  for k, n in [('perturbation_0.01','OP(s=0.01)'),('perturbation_0.015','OP(s=0.015)'),('perturbation_0.02','OP(s=0.02)')]:
518
  if k in perturb_results:
519
  a=perturb_results[k]['auc']
520
- tbl += "| `"+n+"` | `Inference` | `"+f"{a:.4f}"+"` | `"+f"{bl_acc:.1f}"+"%` (Locked) | `"+f"{a-bl_auc:+.4f}`"+" |\n"
521
  gr.Markdown(tbl)
522
 
523
- with gr.Tab("PROTOCOL DOCS (机理分析)"):
524
  gr.Markdown(
525
- "## 1. LABEL SMOOTHING (训练期平滑)\n\n"
526
- "转换硬标签 (One-hot) 为软标签结构从梯度层面抑制模型过拟合定势。\n\n"
527
- "**ALGORITHM**: `y_smooth = (1 - e) * y_onehot + e / V`\n\n"
528
- "| CONFIG | AUC | ACCURACY | BEHAVIOR |\n|------|-----|--------|------|\n"
529
- "| `Baseline` | `" + f"{bl_auc:.4f}" + "` | `" + f"{bl_acc:.1f}" + "%` | 高危裸奔 |\n"
530
- "| `e=0.02` | `" + f"{s002_auc:.4f}" + "` | `" + f"{s002_acc:.1f}" + "%` | 正则化增强 |\n"
531
- "| `e=0.2` | `" + f"{s02_auc:.4f}" + "` | `" + f"{s02_acc:.1f}" + "%` | 防御过载 |\n\n---\n\n"
532
- "## 2. OUTPUT PERTURBATION (推理期加噪)\n\n"
533
- "剥离模型底座参数修改需求,在输出端挂载置信度混淆代\n\n"
534
- "**ALGORITHM**: `L_perturbed = L_original + N(0, s^2)`\n\n"
535
- "| CONFIG | AUC | DELTA | ACCURACY |\n|------|-----|---------|--------|\n"
536
- "| `Baseline` | `" + f"{bl_auc:.4f}" + "` | `--` | `" + f"{bl_acc:.1f}" + "%` |\n"
537
- "| `s=0.01` | `" + f"{op001_auc:.4f}" + "` | `" + f"{bl_auc-op001_auc:.4f}" + "` | `" + f"{bl_acc:.1f}" + "%` |\n"
538
- "| `s=0.015` | `" + f"{op0015_auc:.4f}" + "` | `" + f"{bl_auc-op0015_auc:.4f}" + "` | `" + f"{bl_acc:.1f}" + "%` |\n"
539
- "| `s=0.02` | `" + f"{op002_auc:.4f}" + "` | `" + f"{bl_auc-op002_auc:.4f}" + "` | `" + f"{bl_acc:.1f}" + "%` |\n\n---\n\n"
540
- "## 3. COMPARATIVE ARCHITECTURE\n\n| METRIC | LABEL SMOOTHING | OUTPUT PERTURBATION |\n|------|---------|----------|\n"
541
- "| ENTRY POINT | `Training Time` | `Inference Time` |\n| RETRAIN REQ | `Yes` | `No` |\n| UTILITY IMPACT | `Variable (can improve)` | `Zero Loss` |\n| DEFENSE CORE | `Suppress Memorization` | `Statistical Obfuscation` |\n")
542
-
543
- with gr.Tab("UTILITY EVAL (效用监控)"):
544
- gr.Markdown("## UTILITY BENCHMARK\n\n> 从 300 条基准测例集中进行流式抽样,监控模型真实推理能力。\n")
 
545
  with gr.Row():
546
  with gr.Column():
547
- gr.Markdown("### ACCURACY RETENTION"); gr.Plot(value=make_accuracy_bar())
548
  with gr.Column():
549
- gr.Markdown("### PRIVACY-UTILITY TRADE-OFF"); gr.Plot(value=make_tradeoff())
550
- gr.Markdown("### LIVE UTILITY INFERENCE")
551
  with gr.Row():
552
  with gr.Column():
553
  eval_model = gr.Radio(["基线模型 (Baseline)","标签平滑模型 (e=0.02)","标签平滑模型 (e=0.2)",
554
- "输出扰动 (s=0.01)","输出扰动 (s=0.015)","输出扰动 (s=0.02)"], value="基线模型 (Baseline)", label="SELECT PIPELINE")
555
- eval_btn = gr.Button("RUN VALIDATION TASK", variant="primary")
556
  with gr.Column():
557
  eval_result = gr.Markdown()
558
  eval_btn.click(run_eval_demo, [eval_model], [eval_result])
559
 
560
- with gr.Tab("REPORTS & VIZ (报告大盘)"):
561
- gr.Markdown("## HIGH-RES EXPORT GRAPHICS")
562
- for fn, cap in [("fig1_loss_distribution_comparison.png","Fig 1: Base Loss Distribution Analysis"),
563
- ("fig2_privacy_utility_tradeoff_fixed.png","Fig 2: Utility-Privacy Multi-dimensional Border"),
564
- ("fig3_defense_comparison_bar.png","Fig 3: Aggregate Defense Efficacy")]:
565
  p = os.path.join(BASE_DIR,"figures",fn)
566
  if os.path.exists(p):
567
  gr.Markdown("### "+cap); gr.Image(value=p, show_label=False, height=450); gr.Markdown("---")
568
 
569
- with gr.Tab("FINAL VERDICT (结论)"):
570
  gr.Markdown(
571
- "## EXECUTIVE SUMMARY\n\n---\n\n"
572
- "### 1. VULNERABILITY DETECTED\n\n"
573
- "Baseline Pipeline AUC = **`" + f"{bl_auc:.4f}" + "`**. 成员平均 Loss (`" + f"{bl_m_mean:.4f}" + "`) 显著低于对照组 (`" + f"{bl_nm_mean:.4f}" + "`)。教育大模型底座微调后遗留强烈置信度印记。\n\n---\n\n"
574
- "### 2. LABEL SMOOTHING (EVAL)\n\n"
575
- "| CONFIG | AUC | ACCURACY | NOTE |\n|------|-----|--------|------|\n"
576
- "| `Baseline` | `" + f"{bl_auc:.4f}" + "` | `" + f"{bl_acc:.1f}" + "%` | - |\n"
577
- "| `e=0.02` | `" + f"{s002_auc:.4f}" + "` | `" + f"{s002_acc:.1f}" + "%` | 正则化释放泛化红利 |\n"
578
- "| `e=0.2` | `" + f"{s02_auc:.4f}" + "` | `" + f"{s02_acc:.1f}" + "%` | 防御区间 |\n\n"
579
- "**结论**: 微量标签滑表现出卓越的正向权衡。\n\n---\n\n"
580
- "### 3. OUTPUT PERTURBATION (EVAL)\n\n"
581
- "| CONFIG | AUC | ACCURACY |\n|------|-----|--------|\n"
582
- "| `s=0.01` | `" + f"{op001_auc:.4f}" + "` | `" + f"{bl_acc:.1f}" + "%` |\n"
583
- "| `s=0.02` | `" + f"{op002_auc:.4f}" + "` | `" + f"{bl_acc:.1f}" + "%` |\n\n"
584
- "**结论**: 零效用磨损,部署环境的轻量级首选方案。\n")
585
-
586
- gr.Markdown("---\n\n<center><code style='color:#94a3b8;background:transparent;border:none;'>SYSTEM BUILD: 2026.03 | ARCHITECTURE: QWEN2.5-MATH-1.5B | STATUS: OPERATIONAL</code></center>\n")
 
 
 
 
 
 
 
 
 
 
587
 
588
  demo.launch()
 
91
  templates = [
92
  (f"小明有{a}个苹果,吃掉了{b}个,还剩多少个?", str(a - b)),
93
  (f"每组有{a}人,共{b}组,一共多少人?", str(a * b)),
94
+ (f"图书馆有{a}��书,借出{b}本后又买了{c}本,现在有多少本?", str(a - b + c)),
95
  (f"商店有{a}支铅笔,卖出{b}支,还剩多少支?", str(a - b)),
96
  (f"小红有{a}颗糖,小明给了她{b}颗,现在有多少颗?", str(a + b)),
97
  ]
 
138
  for ax, (k, title) in zip(axes, items):
139
  m = full_results[k]['member_losses']; nm_l = full_results[k]['non_member_losses']
140
  bins = np.linspace(min(min(m), min(nm_l)), max(max(m), max(nm_l)), 30)
141
+ ax.hist(m, bins=bins, alpha=0.55, color='#5B8FF9', label='Member', density=True)
142
+ ax.hist(nm_l, bins=bins, alpha=0.55, color='#E86452', label='Non-Member', density=True)
143
+ ax.set_title(title, fontsize=13, fontweight='bold')
144
+ ax.set_xlabel('Loss', fontsize=11); ax.set_ylabel('Density', fontsize=11)
145
+ ax.legend(fontsize=10); ax.tick_params(labelsize=10)
146
+ ax.grid(True, linestyle='--', alpha=0.3)
147
  ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False)
 
148
  plt.tight_layout()
149
  return fig
150
 
 
162
  nm_pert = nm_losses + np.random.normal(0, sigma, len(nm_losses))
163
  vals = np.concatenate([m_pert, nm_pert])
164
  bins = np.linspace(vals.min(), vals.max(), 30)
165
+ ax.hist(m_pert, bins=bins, alpha=0.55, color='#5B8FF9', label='Member+noise', density=True)
166
+ ax.hist(nm_pert, bins=bins, alpha=0.55, color='#E86452', label='Non-Member+noise', density=True)
167
  pk = 'perturbation_' + str(sigma)
168
  pauc = perturb_results.get(pk, {}).get('auc', 0)
169
+ ax.set_title(f'OP(s={sigma})\nAUC={pauc:.4f}', fontsize=13, fontweight='bold')
170
+ ax.set_xlabel('Loss', fontsize=11); ax.set_ylabel('Density', fontsize=11)
171
+ ax.legend(fontsize=9); ax.tick_params(labelsize=10)
172
+ ax.grid(True, linestyle='--', alpha=0.3)
173
  ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False)
 
174
  plt.tight_layout()
175
  return fig
176
 
177
 
178
  def make_auc_bar():
179
  methods, aucs, colors = [], [], []
180
+ for k, n, c in [('baseline', 'Baseline', '#8C8C8C'), ('smooth_0.02', 'LS(e=0.02)', '#5B8FF9'),
181
+ ('smooth_0.2', 'LS(e=0.2)', '#3D76DD')]:
182
  if k in mia_results: methods.append(n); aucs.append(mia_results[k]['auc']); colors.append(c)
183
+ for k, n, c in [('perturbation_0.01', 'OP(s=0.01)', '#5AD8A6'), ('perturbation_0.015', 'OP(s=0.015)', '#2EAD78'),
184
+ ('perturbation_0.02', 'OP(s=0.02)', '#1A7F5A')]:
185
  if k in perturb_results: methods.append(n); aucs.append(perturb_results[k]['auc']); colors.append(c)
186
  fig, ax = plt.subplots(figsize=(12, 6))
187
+ bars = ax.bar(methods, aucs, color=colors, width=0.5, edgecolor='white', linewidth=1.5)
188
  for bar, a in zip(bars, aucs):
189
+ ax.text(bar.get_x()+bar.get_width()/2, bar.get_height()+0.002, f'{a:.4f}', ha='center', va='bottom', fontsize=11, fontweight='bold')
190
+ ax.axhline(y=0.5, color='#E86452', linestyle='--', linewidth=1.5, alpha=0.6, label='Random Guess (0.5)')
191
+ ax.set_ylabel('MIA AUC', fontsize=12); ax.set_ylim(0.48, max(aucs)+0.035)
192
+ ax.legend(fontsize=10); ax.grid(axis='y', linestyle='--', alpha=0.3)
193
  ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False)
194
+ plt.xticks(fontsize=11); plt.tight_layout()
 
195
  return fig
196
 
197
 
198
  def make_tradeoff():
199
  fig, ax = plt.subplots(figsize=(10, 7))
200
  pts = []
201
+ for k, n, mk, c, sz in [('baseline','Baseline','o','#8C8C8C',220), ('smooth_0.02','LS(e=0.02)','s','#5B8FF9',200), ('smooth_0.2','LS(e=0.2)','s','#3D76DD',200)]:
202
  if k in mia_results and k in utility_results:
203
  pts.append({'n':n,'a':mia_results[k]['auc'],'c':utility_results[k]['accuracy'],'m':mk,'co':c,'s':sz})
204
  ba = utility_results.get('baseline',{}).get('accuracy',0.633)
205
+ for k, n, mk, c, sz in [('perturbation_0.01','OP(s=0.01)','^','#5AD8A6',200), ('perturbation_0.015','OP(s=0.015)','D','#2EAD78',160), ('perturbation_0.02','OP(s=0.02)','^','#1A7F5A',200)]:
206
  if k in perturb_results: pts.append({'n':n,'a':perturb_results[k]['auc'],'c':ba,'m':mk,'co':c,'s':sz})
207
  for p in pts:
208
+ ax.scatter(p['c'], p['a'], label=p['n'], marker=p['m'], color=p['co'], s=p['s'], edgecolors='white', linewidth=2, zorder=5)
209
+ ax.axhline(y=0.5, color='#BFBFBF', linestyle='--', alpha=0.8, label='Random Guess')
210
+ ax.set_xlabel('Accuracy', fontsize=12, fontweight='bold'); ax.set_ylabel('MIA AUC', fontsize=12, fontweight='bold')
211
+ ax.set_title('Privacy-Utility Trade-off', fontsize=14, fontweight='bold')
212
  aa=[p['c'] for p in pts]; ab=[p['a'] for p in pts]
213
  if aa and ab: ax.set_xlim(min(aa)-0.03,max(aa)+0.05); ax.set_ylim(min(min(ab),0.5)-0.02,max(ab)+0.025)
214
+ ax.legend(loc='upper right', fontsize=9); ax.grid(True, alpha=0.2)
215
  ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False)
 
 
216
  plt.tight_layout(); return fig
217
 
218
 
219
  def make_accuracy_bar():
220
  names, accs, colors = [], [], []
221
+ for k, n, c in [('baseline','Baseline','#8C8C8C'), ('smooth_0.02','LS(e=0.02)','#5B8FF9'), ('smooth_0.2','LS(e=0.2)','#3D76DD')]:
222
  if k in utility_results: names.append(n); accs.append(utility_results[k]['accuracy']*100); colors.append(c)
223
  bp = utility_results.get('baseline',{}).get('accuracy',0)*100
224
+ for k, n, c in [('perturbation_0.01','OP(s=0.01)','#5AD8A6'), ('perturbation_0.015','OP(s=0.015)','#2EAD78'), ('perturbation_0.02','OP(s=0.02)','#1A7F5A')]:
225
  if k in perturb_results: names.append(n); accs.append(bp); colors.append(c)
226
  fig, ax = plt.subplots(figsize=(12, 6))
227
+ bars = ax.bar(names, accs, color=colors, width=0.5, edgecolor='white', linewidth=1.5)
228
  for bar, acc in zip(bars, accs):
229
+ ax.text(bar.get_x()+bar.get_width()/2, bar.get_height()+0.5, f'{acc:.1f}%', ha='center', va='bottom', fontsize=11, fontweight='bold')
230
+ ax.set_ylabel('Accuracy (%)', fontsize=12); ax.set_ylim(0, 100)
231
+ ax.grid(axis='y', alpha=0.3); ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False)
232
+ plt.xticks(fontsize=11); plt.tight_layout(); return fig
 
233
 
234
 
235
  def make_loss_gauge(loss_val, m_mean, nm_mean, threshold, m_std, nm_std):
236
  fig, ax = plt.subplots(figsize=(9, 3))
237
  x_min = min(m_mean-3*m_std, loss_val-0.01); x_max = max(nm_mean+3*nm_std, loss_val+0.01)
238
+ ax.axvspan(x_min, threshold, alpha=0.12, color='#5B8FF9')
239
+ ax.axvspan(threshold, x_max, alpha=0.12, color='#E86452')
240
+ ax.axvline(x=threshold, color='#434343', linewidth=2, zorder=3)
241
+ ax.text(threshold, 1.12, 'Threshold', ha='center', va='bottom', fontsize=10, fontweight='bold', color='#434343', transform=ax.get_xaxis_transform())
242
+ ax.axvline(x=m_mean, color='#5B8FF9', linewidth=1.2, linestyle='--', alpha=0.6)
243
+ ax.text(m_mean, -0.3, f'Member\n({m_mean:.4f})', ha='center', va='top', fontsize=8, color='#5B8FF9', transform=ax.get_xaxis_transform())
244
+ ax.axvline(x=nm_mean, color='#E86452', linewidth=1.2, linestyle='--', alpha=0.6)
245
+ ax.text(nm_mean, -0.3, f'Non-Mem\n({nm_mean:.4f})', ha='center', va='top', fontsize=8, color='#E86452', transform=ax.get_xaxis_transform())
246
+ mc = '#5B8FF9' if loss_val < threshold else '#E86452'
247
+ ax.plot(loss_val, 0.5, marker='v', markersize=16, color=mc, zorder=5, transform=ax.get_xaxis_transform())
248
  ax.text(loss_val, 0.78, f'Loss={loss_val:.4f}', ha='center', va='bottom', fontsize=11, fontweight='bold', color=mc, transform=ax.get_xaxis_transform(),
249
+ bbox=dict(boxstyle='round,pad=0.3', facecolor='white', edgecolor=mc, alpha=0.95))
250
+ ax.text((x_min+threshold)/2, 0.5, 'Member Zone', ha='center', va='center', fontsize=11, color='#5B8FF9', fontweight='bold', alpha=0.5, transform=ax.get_xaxis_transform())
251
+ ax.text((threshold+x_max)/2, 0.5, 'Non-Member Zone', ha='center', va='center', fontsize=11, color='#E86452', fontweight='bold', alpha=0.5, transform=ax.get_xaxis_transform())
252
  ax.set_xlim(x_min, x_max); ax.set_yticks([])
253
  for sp in ['top','right','left']: ax.spines[sp].set_visible(False)
254
+ ax.set_xlabel('Loss Value', fontsize=10); plt.tight_layout(); return fig
 
255
 
256
 
257
  # ========================================
 
263
  sample = data[np.random.randint(0, len(data))]
264
  meta = sample['metadata']
265
  task_map = {'calculation':'基础计算','word_problem':'应用题','concept':'概念问答','error_correction':'错题订正'}
266
+ info_md = ("**截获的隐私元数据**\n\n"
267
+ "- **姓名**: " + clean_text(str(meta.get('name',''))) + "\n"
268
+ "- **学号**: " + clean_text(str(meta.get('student_id',''))) + "\n"
269
+ "- **级**: " + clean_text(str(meta.get('class',''))) + "\n"
270
+ "- **成绩**: " + clean_text(str(meta.get('score',''))) + " 分\n"
271
+ "- **类型**: " + task_map.get(sample.get('task_type',''),'') + "\n")
272
  return info_md, clean_text(sample.get('question','')), clean_text(sample.get('answer',''))
273
 
274
 
 
327
  ac = "🔴" if is_member else "🟢"
328
 
329
  if attack_correct and pred_member and is_member:
330
+ v = "⚠️ **攻击成功: 发生了隐私泄露**"; vd = "模型对该样本过于熟悉(Loss低于阈值),攻击者成功判定其为训练集数据。"
331
  elif attack_correct:
332
+ v = "✅ **判正确**"; vd = "攻击者的判定与真实身份一致。"
333
  else:
334
+ v = " **攻击失误**"; vd = "攻击者的判定与真实身份不符。"
335
+
336
+ result_md = (v + "\n\n" + vd + "\n\n"
337
+ "**当前攻击模型**: " + display_label + " (AUC=" + f"{model_auc:.4f}" + ")\n\n"
338
+ "| | 攻击者计算得出 | 系统真实身份 |\n|---|---|---|\n"
339
+ "| 判定 | " + pc + " " + pl + " | " + ac + " " + al + " |\n"
340
+ "| Loss | " + f"{loss:.4f}" + " | Threshold: " + f"{threshold:.4f}" + " |\n")
341
+ q_text = "**样本追踪号 [" + str(idx) + "] :**\n\n" + clean_text(sample.get('question',''))[:500]
342
  return q_text, gauge_fig, result_md
343
 
344
 
 
369
  is_correct = q.get(model_key, q.get('baseline', False))
370
  icon = "✅" if is_correct else "❌"
371
  result_md = (
372
+ "### 测试结果\n\n"
373
+ "**模型**: " + eval_model + " (总体准确率: " + f"{overall_acc:.1f}" + "%)\n\n"
374
+ "| 项目 | 内容 |\n|---|---|\n"
375
+ "| 题目编号 | #" + str(idx+1) + " / 300 |\n"
376
+ "| 题目类型 | " + q.get('type_cn', q['type']) + " |\n"
377
+ "| 题目 | " + q['question'] + " |\n"
378
+ "| 正确答案 | " + q['answer'] + " |\n"
379
+ "| 模型判定 | " + icon + " " + ("正确" if is_correct else "错误") + " |\n\n")
380
  if eval_model.startswith("输出扰动"):
381
+ result_md += "> 输出扰动不改变模型参数因此准确率与基线完全一致。\n"
382
  return result_md
383
 
384
 
385
  # ========================================
386
+ # Interface
387
  # ========================================
388
 
389
  CSS = """
390
+ body { background-color: #f0f4f8 !important; }
391
+ .gradio-container { max-width: 1200px !important; margin: auto !important; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "PingFang SC", "Microsoft YaHei", sans-serif !important; }
392
+ .tab-nav { border-bottom: 2px solid #e1e8f0 !important; margin-bottom: 20px !important; }
393
+ .tab-nav button { font-size: 15px !important; padding: 14px 22px !important; font-weight: 500 !important; color: #64748b !important; border-radius: 8px 8px 0 0 !important; background: transparent !important; border: none !important; }
394
+ .tab-nav button.selected { font-weight: 700 !important; color: #2563eb !important; border-bottom: 3px solid #2563eb !important; }
395
+ .tabitem { background: #fff !important; border-radius: 12px !important; box-shadow: 0 4px 20px rgba(0,0,0,0.04) !important; padding: 30px !important; border: 1px solid #e2e8f0 !important; }
396
+ .prose h1 { font-size: 2rem !important; color: #0f172a !important; font-weight: 800 !important; text-align: center !important; }
397
+ .prose h2 { font-size: 1.35rem !important; color: #1e293b !important; margin-top: 1.5em !important; padding-bottom: 0.4em !important; border-bottom: 2px solid #f1f5f9 !important; font-weight: 700 !important; }
398
+ .prose h3 { font-size: 1.1rem !important; color: #334155 !important; font-weight: 600 !important; }
399
+ .prose table { width: 100% !important; border-collapse: separate !important; border-spacing: 0 !important; margin: 1.2em 0 !important; border-radius: 10px !important; overflow: hidden !important; box-shadow: 0 0 0 1px #e2e8f0, 0 4px 6px -1px rgba(0,0,0,0.05) !important; font-size: 0.9rem !important; }
400
+ .prose th { background: #f8fafc !important; color: #475569 !important; font-weight: 600 !important; padding: 10px 14px !important; border-bottom: 2px solid #e2e8f0 !important; }
401
+ .prose tr:nth-child(even) td { background: #f8fafc !important; }
402
+ .prose td { padding: 9px 14px !important; color: #334155 !important; border-bottom: 1px solid #e2e8f0 !important; }
403
+ .prose blockquote { border-left: 4px solid #3b82f6 !important; background: linear-gradient(to right,#eff6ff,#fff) !important; padding: 14px 18px !important; border-radius: 0 8px 8px 0 !important; color: #1e40af !important; }
404
+ button.primary { background: linear-gradient(135deg,#3b82f6 0%,#2563eb 100%) !important; border: none !important; box-shadow: 0 4px 12px rgba(37,99,235,0.25) !important; font-weight: 600 !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  footer { display: none !important; }
406
  """
407
 
408
+ with gr.Blocks(title="教育大模型隐私攻防", theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky", neutral_hue="slate"), css=CSS) as demo:
 
 
 
 
 
 
409
 
410
+ gr.Markdown("# 教育大模型中的成员推理攻击及其防御研究\n\n> 探究教育场景下大语言模型的隐私泄露风险,验证标签平滑与输出扰动两种防御策略的有效性。\n")
411
 
412
+ with gr.Tab("项目概览"):
 
 
413
  gr.Markdown(
414
+ "## 研究背景\n\n大语言模型在教育领域广泛应用,训练过程不可避免接触学生敏感数据。**成员推理攻击 (MIA)** 能判断数据是否参与训练,构成隐私威胁。\n\n---\n\n"
415
+ "## 实验设计\n\n"
416
+ "| 阶段 | 内容 | 方法 |\n|------|------|------|\n"
417
+ "| 1. 数据准备 | 2000条小学数学辅导对话 | 模板化生成,含隐私字段 |\n"
418
+ "| 2. 基线模型训练 | Qwen2.5-Math-1.5B + LoRA | 标准微调防御 |\n"
419
+ "| 3. 标签平滑模型训练 | 两组平滑系数 | e=0.02 与 e=0.2 分别训练 |\n"
420
+ "| 4. MIA攻击测试 | 全部模型及策略 | 三模型Loss攻击 + 三组输出扰动 |\n"
421
+ "| 5. 效用评估 | 300道数学测试 | 三模型 + 组扰动分别测试 |\n"
422
+ "| 6. 综合分析 | 隐私-效用权衡 | 散点图 + 定量对比 |\n\n---\n\n"
423
+ "## 实验配置\n\n| 项目 | |\n|------|-----|\n"
424
+ "| 基座模型 | " + model_name_str + " |\n"
425
+ "| 微调 | LoRA (r=8, alpha=16) |\n| 训练轮数 | 10 epochs |\n"
426
+ "| 数据量 | " + data_size_str + " |\n| 模型数 | 3 |\n")
427
+
428
+ with gr.Tab("数据展示"):
429
+ gr.Markdown("## 数据集概况\n\n"
430
+ "- **成员数据** (1000): 用于模型训练,模型会\"记住\"这些数据\n"
431
+ "- **非成员数据** (1000): 不参与训练为攻击对照组\n"
432
+ "- 两数据**格式完全相**(都含隐私字段)这是MIA实验的标准设置——攻击者无法从数据格式区分成员与非成员\n\n"
433
+ "### 任务类型分布\n\n"
434
+ "| 类型 | 数量 | 占比 |\n|------|------|------|\n"
435
+ "| 基础计算 | 800 | 40% |\n| 应用题 | 600 | 30% |\n| 概念问答 | 400 | 20% |\n| 错题订正 | 200 | 10% |\n")
436
  with gr.Row():
437
  with gr.Column():
438
+ data_sel = gr.Radio(["成员数据(训练集)","非成员数据(测试集)"], value="成员数据(训练集)", label="选择数据池")
439
+ sample_btn = gr.Button("随机提取", variant="primary")
440
  sample_info = gr.Markdown()
441
  with gr.Column():
442
+ sample_q = gr.Textbox(label="学生提问 (Prompt)", lines=5, interactive=False)
443
+ sample_a = gr.Textbox(label="模型回答 (Ground Truth)", lines=5, interactive=False)
444
  sample_btn.click(show_random_sample, [data_sel], [sample_info, sample_q, sample_a])
445
 
446
+ with gr.Tab("MIA攻击演示"):
447
+ gr.Markdown("## 发起成员推理攻击\n\n选择攻击目标数据源,系统将计算Loss判定。\n")
448
  with gr.Row():
449
  with gr.Column():
450
  atk_model = gr.Radio(["基线模型 (Baseline)","标签平滑模型 (e=0.02)","标签平滑模型 (e=0.2)",
451
+ "输出扰动 (s=0.01)","输出扰动 (s=0.015)","输出扰动 (s=0.02)"], value="基线模型 (Baseline)", label="选择攻击目标")
452
+ atk_type = gr.Radio(["成员数据(训练集)","非成员数据(测试集)"], value="成员数据(训练集)", label="数据来源")
453
+ atk_idx = gr.Slider(0, 999, step=1, value=0, label="样本ID (0-999)")
454
+ atk_btn = gr.Button("执行成员推理攻击", variant="primary", size="lg")
455
  atk_question = gr.Markdown()
456
  with gr.Column():
457
+ gr.Markdown("**攻击侦测控制台**")
458
+ atk_gauge = gr.Plot(label="Loss分布雷达")
459
  atk_result = gr.Markdown()
460
  atk_btn.click(run_mia_demo, [atk_idx, atk_type, atk_model], [atk_question, atk_gauge, atk_result])
461
 
462
+ with gr.Tab("防御对比"):
463
+ gr.Markdown("## 防御策略效果对比\n\n"
464
+ "| 策略 | 类型 | 原理 | 实验优势 | 实验局限 |\n|------|------|------|---------|--------|\n"
465
+ "| 标签平滑 | 训练期 | 软化标签抑制过度记忆 | AUC降至" + f"{s002_auc:.4f}" + "(e=0.02) | 需重新训练 |\n"
466
+ "| 输出扰动 | 推理期 | Loss加高斯噪声 | AUC降至" + f"{op002_auc:.4f}" + "(s=0.02),效用 | 仅遮蔽统计信号 |\n")
467
+ gr.Markdown("### AUC对比"); gr.Plot(value=make_auc_bar())
468
+ gr.Markdown("### Loss分布 - 三个模型"); gr.Plot(value=make_loss_distribution())
469
+ gr.Markdown("### Loss分布 - 输出扰动效果"); gr.Plot(value=make_perturb_loss_distribution())
470
+ tbl = "### 完整结果\n\n| 策略 | 类型 | AUC | 准确率 | AUC变化 |\n|------|------|-----|--------|--------|\n"
471
+ for k, n, cat in [('baseline','基线','--'),('smooth_0.02','LS(e=0.02)','训练期'),('smooth_0.2','LS(e=0.2)','训练期')]:
472
  if k in mia_results:
473
  a=mia_results[k]['auc']; acc=utility_results.get(k,{}).get('accuracy',0)*100
474
+ d = "--" if k=='baseline' else f"{a-bl_auc:+.4f}"
475
+ tbl += "| "+n+" | "+cat+" | "+f"{a:.4f}"+" | "+f"{acc:.1f}"+"%"+" | "+d+" |\n"
476
  for k, n in [('perturbation_0.01','OP(s=0.01)'),('perturbation_0.015','OP(s=0.015)'),('perturbation_0.02','OP(s=0.02)')]:
477
  if k in perturb_results:
478
  a=perturb_results[k]['auc']
479
+ tbl += "| "+n+" | 推理期 | "+f"{a:.4f}"+" | "+f"{bl_acc:.1f}"+"% (不变) | "+f"{a-bl_auc:+.4f}"+" |\n"
480
  gr.Markdown(tbl)
481
 
482
+ with gr.Tab("防御详解"):
483
  gr.Markdown(
484
+ "## 一、标签平滑 (Label Smoothing)\n\n**类型**: 训练期防御\n\n"
485
+ "将训练标签从硬标签转换为软标签,降低过拟合。\n\n"
486
+ "**公式**: y_smooth = (1 - e) * y_onehot + e / V\n\n"
487
+ "其中 e 为平滑系数,V 为词汇表大小。\n\n"
488
+ "| 参数 | AUC | 准确率 | 分析 |\n|------|-----|--------|------|\n"
489
+ "| 基线 (e=0) | " + f"{bl_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | 无防御 |\n"
490
+ "| e=0.02 | " + f"{s002_auc:.4f}" + " | " + f"{s002_acc:.1f}" + "% | 温和平滑 |\n"
491
+ "| e=0.2 | " + f"{s02_auc:.4f}" + " | " + f"{s02_acc:.1f}" + "% | 强力平滑 |\n\n---\n\n"
492
+ "## 二、输出扰动 (Output Perturbation)\n\n**类型**: 推期防御\n\n"
493
+ "在推理阶段对Loss注入高斯噪声。\n\n"
494
+ "**公式**: L_perturbed = L_original + N(0, s^2)\n\n"
495
+ "| 参数 | AUC | AUC降幅 | 准确率 |\n|------|-----|---------|--------|\n"
496
+ "| 基线 | " + f"{bl_auc:.4f}" + " | -- | " + f"{bl_acc:.1f}" + "% |\n"
497
+ "| s=0.01 | " + f"{op001_auc:.4f}" + " | " + f"{bl_auc-op001_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% |\n"
498
+ "| s=0.015 | " + f"{op0015_auc:.4f}" + " | " + f"{bl_auc-op0015_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% |\n"
499
+ "| s=0.02 | " + f"{op002_auc:.4f}" + " | " + f"{bl_auc-op002_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% |\n\n---\n\n"
500
+ "## 三、综合对比\n\n| 维度 | 标签平滑 | 输出扰动 |\n|------|---------|----------|\n"
501
+ "| 作用阶段 | 训练期 | 推理期 |\n| 需要重训 | 是 | 否 |\n| 效用影响 | 取决于系数 | 无 |\n| 防御原理 | 降低记忆 | 遮蔽信号 |\n| 部署难度 | 训练介入 | 即插即用 |\n")
502
+
503
+ with gr.Tab("效用评估"):
504
+ gr.Markdown("## 效用评估\n\n> 从300道测试题中随机抽取,展示模型的实际作答情况。\n")
505
  with gr.Row():
506
  with gr.Column():
507
+ gr.Markdown("### 准确率对比"); gr.Plot(value=make_accuracy_bar())
508
  with gr.Column():
509
+ gr.Markdown("### 隐私-效用权衡"); gr.Plot(value=make_tradeoff())
510
+ gr.Markdown("### 在线效用测试")
511
  with gr.Row():
512
  with gr.Column():
513
  eval_model = gr.Radio(["基线模型 (Baseline)","标签平滑模型 (e=0.02)","标签平滑模型 (e=0.2)",
514
+ "输出扰动 (s=0.01)","输出扰动 (s=0.015)","输出扰动 (s=0.02)"], value="基线模型 (Baseline)", label="选择模型/策略")
515
+ eval_btn = gr.Button("随机抽题测试", variant="primary")
516
  with gr.Column():
517
  eval_result = gr.Markdown()
518
  eval_btn.click(run_eval_demo, [eval_model], [eval_result])
519
 
520
+ with gr.Tab("实验结果可视化"):
521
+ gr.Markdown("## 实验核心图表")
522
+ for fn, cap in [("fig1_loss_distribution_comparison.png","1: 成员与非成员Loss分布对比"),
523
+ ("fig2_privacy_utility_tradeoff_fixed.png","2: 隐私风险与模型效用权衡"),
524
+ ("fig3_defense_comparison_bar.png","3: 各防御策略AUC对比")]:
525
  p = os.path.join(BASE_DIR,"figures",fn)
526
  if os.path.exists(p):
527
  gr.Markdown("### "+cap); gr.Image(value=p, show_label=False, height=450); gr.Markdown("---")
528
 
529
+ with gr.Tab("研究结论"):
530
  gr.Markdown(
531
+ "## 研究结论\n\n---\n\n"
532
+ "### 一、教育大模型面临显著的MIA风险\n\n"
533
+ "基线模型 AUC = **" + f"{bl_auc:.4f}" + "**成员平均Loss (" + f"{bl_m_mean:.4f}" + ") 低于非成员 (" + f"{bl_nm_mean:.4f}" + ")模型对训练数据存可被利用的记忆效应。\n\n---\n\n"
534
+ "### 二、标签平滑的有效性与局限性\n\n"
535
+ "| 参数 | AUC | 准确率 | 分析 |\n|------|-----|--------|------|\n"
536
+ "| 基线 (e=0) | " + f"{bl_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | 无防御 |\n"
537
+ "| e=0.02 | " + f"{s002_auc:.4f}" + " | " + f"{s002_acc:.1f}" + "% | 正则化提升泛化 |\n"
538
+ "| e=0.2 | " + f"{s02_auc:.4f}" + " | " + f"{s02_acc:.1f}" + "% | 防御更强 |\n\n"
539
+ "e=0.02在隐私保护与效用保持间取得较好平衡。\n\n---\n\n"
540
+ "### 三、输出扰动的独特优势\n\n"
541
+ "| 参数 | AUC | AUC降幅 | 准确率 |\n|------|-----|---------|--------|\n"
542
+ "| s=0.01 | " + f"{op001_auc:.4f}" + " | " + f"{bl_auc-op001_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% |\n"
543
+ "| s=0.015 | " + f"{op0015_auc:.4f}" + " | " + f"{bl_auc-op0015_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% |\n"
544
+ "| s=0.02 | " + f"{op002_auc:.4f}" + " | " + f"{bl_auc-op002_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% |\n\n"
545
+ "零效用损失,适合已部署系统加固。\n\n---\n\n"
546
+ "### 四、隐私-效用权衡\n\n"
547
+ "| 策略 | AUC | 准确率 | AUC变化 | 效用变化 |\n|------|-----|--------|--------|--------|\n"
548
+ "| 基线 | " + f"{bl_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | -- | -- |\n"
549
+ "| LS e=0.02 | " + f"{s002_auc:.4f}" + " | " + f"{s002_acc:.1f}" + "% | " + f"{s002_auc-bl_auc:+.4f}" + " | " + f"{s002_acc-bl_acc:+.1f}" + "pp |\n"
550
+ "| LS e=0.2 | " + f"{s02_auc:.4f}" + " | " + f"{s02_acc:.1f}" + "% | " + f"{s02_auc-bl_auc:+.4f}" + " | " + f"{s02_acc-bl_acc:+.1f}" + "pp |\n"
551
+ "| OP s=0.01 | " + f"{op001_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | " + f"{op001_auc-bl_auc:+.4f}" + " | 0 |\n"
552
+ "| OP s=0.015 | " + f"{op0015_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | " + f"{op0015_auc-bl_auc:+.4f}" + " | 0 |\n"
553
+ "| OP s=0.02 | " + f"{op002_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | " + f"{op002_auc-bl_auc:+.4f}" + " | 0 |\n\n"
554
+ "两类策略机制互补,可根据场景灵活选择或组合。\n")
555
+
556
+ gr.Markdown("---\n\n<center>教育大模型中的成员推理攻击及其防御思路研究</center>\n")
557
 
558
  demo.launch()