xiaohy commited on
Commit
eb028b0
·
verified ·
1 Parent(s): cba8a0b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +245 -309
app.py CHANGED
@@ -5,12 +5,8 @@ import numpy as np
5
  import matplotlib
6
  matplotlib.use('Agg')
7
  import matplotlib.pyplot as plt
8
- from matplotlib.patches import FancyBboxPatch
9
  import gradio as gr
10
 
11
- # ========================================
12
- # 1. Load Data
13
- # ========================================
14
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
15
 
16
 
@@ -39,7 +35,6 @@ config = load_json("config.json")
39
  plt.rcParams['font.sans-serif'] = ['DejaVu Sans']
40
  plt.rcParams['axes.unicode_minus'] = False
41
 
42
- # Pre-fetch values
43
  bl_auc = mia_results.get('baseline', {}).get('auc', 0)
44
  s002_auc = mia_results.get('smooth_0.02', {}).get('auc', 0)
45
  s02_auc = mia_results.get('smooth_0.2', {}).get('auc', 0)
@@ -53,12 +48,29 @@ bl_m_mean = mia_results.get('baseline', {}).get('member_loss_mean', 0.19)
53
  bl_nm_mean = mia_results.get('baseline', {}).get('non_member_loss_mean', 0.23)
54
  bl_m_std = mia_results.get('baseline', {}).get('member_loss_std', 0.03)
55
  bl_nm_std = mia_results.get('baseline', {}).get('non_member_loss_std', 0.03)
 
 
 
 
 
 
 
 
 
 
 
56
  model_name_str = config.get('model_name', 'Qwen/Qwen2.5-Math-1.5B-Instruct')
57
  data_size_str = str(config.get('data_size', 2000))
58
 
 
 
 
 
 
 
59
 
60
  # ========================================
61
- # 2. Chart Functions
62
  # ========================================
63
 
64
  def make_pie_chart():
@@ -66,75 +78,76 @@ def make_pie_chart():
66
  for item in member_data + non_member_data:
67
  t = item.get('task_type', 'unknown')
68
  tc[t] = tc.get(t, 0) + 1
69
- nm = {'calculation': 'Calculation', 'word_problem': 'Word Problem',
70
- 'concept': 'Concept Q&A', 'error_correction': 'Error Correction'}
71
  labels = [nm.get(k, k) for k in tc]
72
  sizes = list(tc.values())
73
  colors = ['#5B8FF9', '#5AD8A6', '#F6BD16', '#E86452']
74
- fig, ax = plt.subplots(figsize=(6, 5))
75
  wedges, texts, autotexts = ax.pie(
76
  sizes, labels=labels, autopct='%1.1f%%', colors=colors[:len(labels)],
77
- startangle=90, textprops={'fontsize': 10},
78
  wedgeprops={'edgecolor': 'white', 'linewidth': 2})
79
  for t in autotexts:
80
  t.set_fontsize(10)
81
  t.set_fontweight('bold')
82
- ax.set_title('Task Type Distribution', fontsize=13, fontweight='bold', pad=10)
83
  plt.tight_layout()
84
  return fig
85
 
86
 
87
  def make_loss_distribution():
88
  items = []
89
- for k, t in [('baseline', 'Baseline'), ('smooth_0.02', 'LS (e=0.02)'), ('smooth_0.2', 'LS (e=0.2)')]:
90
  if k in full_results:
91
  auc = mia_results.get(k, {}).get('auc', 0)
92
- items.append((k, t + " | AUC=" + f"{auc:.4f}"))
93
  n = len(items)
94
  if n == 0:
95
  fig, ax = plt.subplots()
96
  ax.text(0.5, 0.5, 'No data', ha='center')
97
  return fig
98
- fig, axes = plt.subplots(1, n, figsize=(5 * n, 4))
99
  if n == 1:
100
  axes = [axes]
101
  for ax, (k, title) in zip(axes, items):
102
  m = full_results[k]['member_losses']
103
- nm = full_results[k]['non_member_losses']
104
- bins = np.linspace(min(min(m), min(nm)), max(max(m), max(nm)), 35)
105
- ax.hist(m, bins=bins, alpha=0.6, color='#5B8FF9', label='Member', density=True)
106
- ax.hist(nm, bins=bins, alpha=0.6, color='#E86452', label='Non-Member', density=True)
107
- ax.set_title(title, fontsize=11, fontweight='bold')
108
- ax.set_xlabel('Loss', fontsize=9)
109
- ax.set_ylabel('Density', fontsize=9)
110
- ax.legend(fontsize=8)
 
 
 
111
  ax.grid(True, linestyle='--', alpha=0.3)
112
  ax.spines['top'].set_visible(False)
113
  ax.spines['right'].set_visible(False)
114
- plt.tight_layout()
115
  return fig
116
 
117
 
118
  def make_auc_bar():
119
  methods, aucs, colors = [], [], []
120
- for k, name, c in [('baseline', 'Baseline', '#8C8C8C'), ('smooth_0.02', 'LS (e=0.02)', '#5B8FF9'),
121
- ('smooth_0.2', 'LS (e=0.2)', '#3D76DD')]:
122
  if k in mia_results:
123
  methods.append(name); aucs.append(mia_results[k]['auc']); colors.append(c)
124
- for k, name, c in [('perturbation_0.01', 'OP (s=0.01)', '#5AD8A6'),
125
- ('perturbation_0.015', 'OP (s=0.015)', '#2EAD78'),
126
- ('perturbation_0.02', 'OP (s=0.02)', '#1A7F5A')]:
127
  if k in perturb_results:
128
  methods.append(name); aucs.append(perturb_results[k]['auc']); colors.append(c)
129
  fig, ax = plt.subplots(figsize=(9, 5))
130
  bars = ax.bar(methods, aucs, color=colors, width=0.5, edgecolor='white', linewidth=1.5)
131
  for bar, a in zip(bars, aucs):
132
- ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.003,
133
- f'{a:.4f}', ha='center', va='bottom', fontsize=10, fontweight='bold')
134
  ax.axhline(y=0.5, color='#E86452', linestyle='--', linewidth=1.5, alpha=0.6, label='Random Guess (0.5)')
135
  ax.set_ylabel('MIA AUC', fontsize=11)
136
- ax.set_title('Defense Mechanisms - AUC', fontsize=13, fontweight='bold')
137
- ax.set_ylim(0.48, max(aucs) + 0.04 if aucs else 0.7)
138
  ax.legend(fontsize=9)
139
  ax.grid(axis='y', linestyle='--', alpha=0.3)
140
  ax.spines['top'].set_visible(False)
@@ -147,15 +160,16 @@ def make_auc_bar():
147
  def make_tradeoff():
148
  fig, ax = plt.subplots(figsize=(8, 6))
149
  pts = []
150
- for k, name, mk, c, sz in [('baseline', 'Baseline', 'o', '#8C8C8C', 180),
151
- ('smooth_0.02', 'LS (e=0.02)', 's', '#5B8FF9', 160),
152
- ('smooth_0.2', 'LS (e=0.2)', 's', '#3D76DD', 160)]:
153
  if k in mia_results and k in utility_results:
154
  pts.append({'n': name, 'a': mia_results[k]['auc'], 'c': utility_results[k]['accuracy'],
155
  'm': mk, 'co': c, 's': sz})
156
  ba = utility_results.get('baseline', {}).get('accuracy', 0.633)
157
- for k, name, mk, c, sz in [('perturbation_0.01', 'OP (s=0.01)', '^', '#5AD8A6', 170),
158
- ('perturbation_0.02', 'OP (s=0.02)', '^', '#1A7F5A', 170)]:
 
159
  if k in perturb_results:
160
  pts.append({'n': name, 'a': perturb_results[k]['auc'], 'c': ba, 'm': mk, 'co': c, 's': sz})
161
  for p in pts:
@@ -163,13 +177,13 @@ def make_tradeoff():
163
  s=p['s'], edgecolors='white', linewidth=2, zorder=5)
164
  ax.axhline(y=0.5, color='#BFBFBF', linestyle='--', alpha=0.8, label='Random Guess')
165
  ax.set_xlabel('Accuracy', fontsize=11, fontweight='bold')
166
- ax.set_ylabel('MIA AUC', fontsize=11, fontweight='bold')
167
  ax.set_title('Privacy-Utility Trade-off', fontsize=13, fontweight='bold')
168
  aa = [p['c'] for p in pts]; ab = [p['a'] for p in pts]
169
  if aa and ab:
170
  ax.set_xlim(min(aa)-0.03, max(aa)+0.05)
171
  ax.set_ylim(min(min(ab), 0.5)-0.02, max(ab)+0.025)
172
- ax.legend(loc='upper right', fontsize=9, fancybox=True)
173
  ax.grid(True, alpha=0.2)
174
  ax.spines['top'].set_visible(False)
175
  ax.spines['right'].set_visible(False)
@@ -179,22 +193,22 @@ def make_tradeoff():
179
 
180
  def make_accuracy_bar():
181
  names, accs, colors = [], [], []
182
- for k, name, c in [('baseline', 'Baseline', '#8C8C8C'), ('smooth_0.02', 'LS (e=0.02)', '#5B8FF9'),
183
- ('smooth_0.2', 'LS (e=0.2)', '#3D76DD')]:
184
  if k in utility_results:
185
  names.append(name); accs.append(utility_results[k]['accuracy']*100); colors.append(c)
186
  bp = utility_results.get('baseline', {}).get('accuracy', 0)*100
187
- for k, name, c in [('perturbation_0.01', 'OP (s=0.01)', '#5AD8A6'),
188
- ('perturbation_0.02', 'OP (s=0.02)', '#1A7F5A')]:
 
189
  if k in perturb_results:
190
  names.append(name); accs.append(bp); colors.append(c)
191
  fig, ax = plt.subplots(figsize=(9, 5))
192
  bars = ax.bar(names, accs, color=colors, width=0.5, edgecolor='white', linewidth=1.5)
193
  for bar, acc in zip(bars, accs):
194
- ax.text(bar.get_x()+bar.get_width()/2, bar.get_height()+0.6,
195
- f'{acc:.1f}%', ha='center', va='bottom', fontsize=10, fontweight='bold')
196
  ax.set_ylabel('Accuracy (%)', fontsize=11)
197
- ax.set_title('Model Utility (300 Math Questions)', fontsize=13, fontweight='bold')
198
  ax.set_ylim(0, 100)
199
  ax.grid(axis='y', alpha=0.3)
200
  ax.spines['top'].set_visible(False)
@@ -204,24 +218,21 @@ def make_accuracy_bar():
204
  return fig
205
 
206
 
207
- def make_loss_gauge(loss_val, m_mean, nm_mean, threshold):
208
  fig, ax = plt.subplots(figsize=(8, 2.8))
209
- x_min = min(m_mean - 3*bl_m_std, loss_val - 0.01)
210
- x_max = max(nm_mean + 3*bl_nm_std, loss_val + 0.01)
211
-
212
  ax.axvspan(x_min, threshold, alpha=0.12, color='#5B8FF9')
213
  ax.axvspan(threshold, x_max, alpha=0.12, color='#E86452')
214
  ax.axvline(x=threshold, color='#434343', linewidth=2, linestyle='-', zorder=3)
215
  ax.text(threshold, 1.12, 'Threshold', ha='center', va='bottom', fontsize=9,
216
  fontweight='bold', color='#434343', transform=ax.get_xaxis_transform())
217
-
218
  ax.axvline(x=m_mean, color='#5B8FF9', linewidth=1.2, linestyle='--', alpha=0.6)
219
- ax.text(m_mean, -0.28, f'Member Mean\n({m_mean:.4f})', ha='center', va='top',
220
  fontsize=7.5, color='#5B8FF9', transform=ax.get_xaxis_transform())
221
  ax.axvline(x=nm_mean, color='#E86452', linewidth=1.2, linestyle='--', alpha=0.6)
222
- ax.text(nm_mean, -0.28, f'Non-Member Mean\n({nm_mean:.4f})', ha='center', va='top',
223
  fontsize=7.5, color='#E86452', transform=ax.get_xaxis_transform())
224
-
225
  in_member = loss_val < threshold
226
  mc = '#5B8FF9' if in_member else '#E86452'
227
  ax.plot(loss_val, 0.5, marker='v', markersize=16, color=mc, zorder=5,
@@ -229,26 +240,23 @@ def make_loss_gauge(loss_val, m_mean, nm_mean, threshold):
229
  ax.text(loss_val, 0.78, f'Loss={loss_val:.4f}', ha='center', va='bottom', fontsize=10,
230
  fontweight='bold', color=mc, transform=ax.get_xaxis_transform(),
231
  bbox=dict(boxstyle='round,pad=0.3', facecolor='white', edgecolor=mc, alpha=0.95))
232
-
233
  mc_x = (x_min + threshold) / 2
234
  nmc_x = (threshold + x_max) / 2
235
  ax.text(mc_x, 0.5, 'Member Zone', ha='center', va='center', fontsize=10,
236
  color='#5B8FF9', fontweight='bold', alpha=0.5, transform=ax.get_xaxis_transform())
237
  ax.text(nmc_x, 0.5, 'Non-Member Zone', ha='center', va='center', fontsize=10,
238
  color='#E86452', fontweight='bold', alpha=0.5, transform=ax.get_xaxis_transform())
239
-
240
  ax.set_xlim(x_min, x_max)
241
  ax.set_yticks([])
242
- ax.spines['top'].set_visible(False)
243
- ax.spines['right'].set_visible(False)
244
- ax.spines['left'].set_visible(False)
245
  ax.set_xlabel('Loss Value', fontsize=9)
246
  plt.tight_layout()
247
  return fig
248
 
249
 
250
  # ========================================
251
- # 3. Callbacks
252
  # ========================================
253
 
254
  def show_random_sample(data_type):
@@ -257,52 +265,54 @@ def show_random_sample(data_type):
257
  meta = sample['metadata']
258
  task_map = {'calculation': '基础计算', 'word_problem': '应用题',
259
  'concept': '概念问答', 'error_correction': '���题订正'}
260
-
261
  info_md = (
262
  "**截获的隐私元数据**\n\n"
263
  "- **姓名**: " + clean_text(str(meta.get('name', ''))) + "\n"
264
  "- **学号**: " + clean_text(str(meta.get('student_id', ''))) + "\n"
265
  "- **班级**: " + clean_text(str(meta.get('class', ''))) + "\n"
266
  "- **成绩**: " + clean_text(str(meta.get('score', ''))) + " 分\n"
267
- "- **类型**: " + task_map.get(sample.get('task_type', ''), sample.get('task_type', '')) + "\n"
268
- )
269
  return info_md, clean_text(sample.get('question', '')), clean_text(sample.get('answer', ''))
270
 
271
 
272
- def run_mia_demo(sample_index, data_type):
 
 
 
 
 
 
 
273
  is_member = (data_type == "成员数据(训练集)")
274
  data = member_data if is_member else non_member_data
275
  idx = min(int(sample_index), len(data) - 1)
276
  sample = data[idx]
277
 
278
- bl = full_results.get('baseline', {})
279
- if is_member and idx < len(bl.get('member_losses', [])):
280
- loss = bl['member_losses'][idx]
281
- elif not is_member and idx < len(bl.get('non_member_losses', [])):
282
- loss = bl['non_member_losses'][idx]
 
 
 
283
  else:
284
- loss = float(np.random.normal(bl_m_mean if is_member else bl_nm_mean, 0.02))
285
 
286
- threshold = (bl_m_mean + bl_nm_mean) / 2.0
 
 
 
 
287
  pred_member = (loss < threshold)
288
  attack_correct = (pred_member == is_member)
289
 
290
- gauge_fig = make_loss_gauge(loss, bl_m_mean, bl_nm_mean, threshold)
291
-
292
- # Build result card
293
- if pred_member:
294
- pred_label = "训练成员"
295
- pred_color = "🔴"
296
- else:
297
- pred_label = "非训练成员"
298
- pred_color = "🟢"
299
 
300
- if is_member:
301
- actual_label = "训练成员"
302
- actual_color = "🔴"
303
- else:
304
- actual_label = "非训练成员"
305
- actual_color = "🟢"
306
 
307
  if attack_correct and pred_member and is_member:
308
  verdict = "⚠️ **攻击成功: 发生了隐私泄露**"
@@ -314,155 +324,53 @@ def run_mia_demo(sample_index, data_type):
314
  verdict = "❌ **攻击失误**"
315
  verdict_detail = "攻击者的判定与真实身份不符。"
316
 
 
317
  result_md = (
318
- verdict + "\n\n"
319
- + verdict_detail + "\n\n"
320
  "| | 攻击者计算得出 | 系统真实身份 |\n"
321
  "|---|---|---|\n"
322
  "| 判定 | " + pred_color + " " + pred_label + " | " + actual_color + " " + actual_label + " |\n"
323
- "| Loss | " + f"{loss:.4f}" + " | Threshold: " + f"{threshold:.4f}" + " |\n"
324
- )
325
 
326
  q_text = "**样本追踪号 [" + str(idx) + "] :**\n\n" + clean_text(sample.get('question', ''))[:500]
327
  return q_text, gauge_fig, result_md
328
 
329
 
330
  # ========================================
331
- # 4. Build Interface (CSS 精装修版)
332
  # ========================================
333
 
334
  CSS = """
335
- /* 全局背景与字体 */
336
  body { background-color: #f0f4f8 !important; }
337
  .gradio-container {
338
- max-width: 1200px !important;
339
- margin: auto !important;
340
  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "PingFang SC", "Microsoft YaHei", sans-serif !important;
341
  }
342
-
343
- /* Tab 导航高级感 */
344
- .tab-nav {
345
- border-bottom: 2px solid #e1e8f0 !important;
346
- background-color: transparent !important;
347
- margin-bottom: 20px !important;
348
- }
349
  .tab-nav button {
350
- font-size: 15px !important;
351
- padding: 14px 24px !important;
352
- font-weight: 500 !important;
353
- color: #64748b !important;
354
- border-radius: 8px 8px 0 0 !important;
355
- transition: all 0.3s ease !important;
356
- background: transparent !important;
357
- border: none !important;
358
- }
359
- .tab-nav button:hover {
360
- color: #3b82f6 !important;
361
- background: rgba(59, 130, 246, 0.05) !important;
362
  }
363
- .tab-nav button.selected {
364
- font-weight: 700 !important;
365
- color: #2563eb !important;
366
- border-bottom: 3px solid #2563eb !important;
367
- }
368
-
369
- /* 内容区域卡片化布局 */
370
  .tabitem {
371
- background: #ffffff !important;
372
- border-radius: 12px !important;
373
- box-shadow: 0 4px 20px rgba(0, 0, 0, 0.04) !important;
374
- padding: 30px !important;
375
- border: 1px solid #e2e8f0 !important;
376
- }
377
-
378
- /* 标题排版优化 */
379
- .prose h1 {
380
- font-size: 2.2rem !important;
381
- color: #0f172a !important;
382
- letter-spacing: -0.02em !important;
383
- font-weight: 800 !important;
384
- text-align: center !important;
385
- margin-bottom: 0.5em !important;
386
- }
387
- .prose h2 {
388
- font-size: 1.4rem !important;
389
- color: #1e293b !important;
390
- margin-top: 1.5em !important;
391
- padding-bottom: 0.4em !important;
392
- border-bottom: 2px solid #f1f5f9 !important;
393
- font-weight: 700 !important;
394
- }
395
- .prose h3 {
396
- font-size: 1.15rem !important;
397
- color: #334155 !important;
398
- font-weight: 600 !important;
399
- margin-top: 1.2em !important;
400
- }
401
-
402
- /* 🚀 表格终极美化 (带圆角、阴影和斑马纹) */
403
- .prose table {
404
- width: 100% !important;
405
- border-collapse: separate !important;
406
- border-spacing: 0 !important;
407
- margin: 1.5em 0 !important;
408
- border-radius: 10px !important;
409
- overflow: hidden !important;
410
- box-shadow: 0 0 0 1px #e2e8f0, 0 4px 6px -1px rgba(0, 0, 0, 0.05) !important;
411
- font-size: 0.95rem !important;
412
- }
413
- .prose th {
414
- background: #f8fafc !important;
415
- color: #475569 !important;
416
- font-weight: 600 !important;
417
- text-transform: uppercase !important;
418
- font-size: 0.85rem !important;
419
- letter-spacing: 0.05em !important;
420
- padding: 14px 16px !important;
421
- border-bottom: 2px solid #e2e8f0 !important;
422
- text-align: left !important;
423
- }
424
- .prose tr:nth-child(even) td {
425
- background: #f8fafc !important;
426
- }
427
- .prose td {
428
- padding: 12px 16px !important;
429
- color: #334155 !important;
430
- border-bottom: 1px solid #e2e8f0 !important;
431
- transition: background-color 0.2s ease !important;
432
- }
433
- .prose tr:last-child td {
434
- border-bottom: none !important;
435
- }
436
- .prose tr:hover td {
437
- background-color: #f1f5f9 !important;
438
- }
439
-
440
- /* 引用块美化 (高亮重点说明) */
441
- .prose blockquote {
442
- border-left: 4px solid #3b82f6 !important;
443
- background: linear-gradient(to right, #eff6ff, #ffffff) !important;
444
- padding: 16px 20px !important;
445
- border-radius: 0 8px 8px 0 !important;
446
- font-size: 0.95rem !important;
447
- color: #1e40af !important;
448
- margin: 1.5em 0 !important;
449
- box-shadow: 0 1px 2px rgba(0,0,0,0.02) !important;
450
- }
451
-
452
- /* 按钮高级感特效 */
453
- button.primary {
454
- background: linear-gradient(135deg, #3b82f6 0%, #2563eb 100%) !important;
455
- border: none !important;
456
- box-shadow: 0 4px 12px rgba(37, 99, 235, 0.25) !important;
457
- font-weight: 600 !important;
458
- transition: all 0.3s ease !important;
459
- }
460
- button.primary:hover {
461
- transform: translateY(-2px) !important;
462
- box-shadow: 0 6px 16px rgba(37, 99, 235, 0.35) !important;
463
  }
464
-
465
- /* 隐藏无关页脚标志 */
 
 
 
 
 
 
 
 
 
 
466
  footer { display: none !important; }
467
  """
468
 
@@ -472,201 +380,229 @@ with gr.Blocks(title="教育大模型隐私攻防", theme=gr.themes.Soft(
472
  gr.Markdown(
473
  "# 教育大模型中的成员推理攻击及其防御研究\n\n"
474
  "> 探究教育场景下大语言模型的隐私泄露风险,"
475
- "验证标签平滑与输出扰动两种防御策略的有效性。\n")
476
 
477
- # --- Tab 1 ---
478
  with gr.Tab("项目概览"):
479
  gr.Markdown(
480
  "## 研究背景\n\n"
481
- "大语言模型在教育领域的应用日益广泛,模型训练过程中不可避免地接触到学生敏感数据。"
482
- "**成员推理攻击 (MIA)** 能够判断某条数据是否参与了模型训练,构成隐私威胁。\n\n"
 
483
  "---\n\n"
484
  "## 实验设计\n\n"
485
  "| 阶段 | 内容 | 方法 |\n"
486
  "|------|------|------|\n"
487
- "| 数据准备 | 2000条数学辅导对话 | 模板化生成,含隐私字段 |\n"
488
- "| 模型训练 | Qwen2.5-Math + LoRA | 基线 + 签平滑 (e=0.02, 0.2) |\n"
489
- "| MIA攻击 | Loss-based攻击 | 计算全样本Loss,AUC评估 |\n"
490
- "| 输出扰动 | 推理期防御 | Loss加高斯噪声 (s=0.01~0.02) |\n"
491
- "| 效用评估 | 300道数学测试 | 准确率评估 |\n"
492
- "| 综合分析 | 隐私-效用权衡 | 散点图 + 定量对比 |\n\n"
 
493
  "---\n\n"
494
  "## 实验配置\n\n"
495
- "| 项 | 值 |\n"
496
- "|---|---|\n"
497
  "| 基座模型 | " + model_name_str + " |\n"
498
- "| 微调 | LoRA (r=8, alpha=16) |\n"
499
  "| 训练轮数 | 10 epochs |\n"
500
- "| 数据量 | " + data_size_str + " 条 |\n")
 
501
 
502
- # --- Tab 2 ---
503
  with gr.Tab("数据展示"):
504
  gr.Markdown("## 数据集概况\n\n"
505
- "成员1000条(训练集)+ 非成员1000条(对照组),每条含学生隐私字段。\n")
506
  with gr.Row():
507
  with gr.Column(scale=1):
508
- gr.Plot(value=make_pie_chart(), label="图表")
509
  with gr.Column(scale=1):
510
  gr.Markdown("**选择靶向数据池**")
511
  data_sel = gr.Radio(["成员数据(训练集)", "非成员数据(测试集)"],
512
  value="成员数据(训练集)", label="")
513
  sample_btn = gr.Button("随机��取", variant="primary")
514
  sample_info = gr.Markdown()
515
-
516
  gr.Markdown("---\n\n**原始对话内容**")
517
  with gr.Row():
518
  sample_q = gr.Textbox(label="学生提问 (Prompt)", lines=5, interactive=False)
519
  sample_a = gr.Textbox(label="模型回答 (Ground Truth)", lines=5, interactive=False)
520
-
521
  sample_btn.click(show_random_sample, [data_sel], [sample_info, sample_q, sample_a])
522
 
523
- # --- Tab 3 ---
524
  with gr.Tab("MIA攻击演示"):
525
  gr.Markdown(
526
  "## 发起成员推理攻击\n\n"
527
- "调整下方滑块选择一条数据,系统将计算该条数据 Loss 值并实施判定。\n")
528
-
529
  with gr.Row():
530
  with gr.Column(scale=1):
 
 
 
531
  atk_type = gr.Radio(["成员数据(训练集)", "非成员数据(测试集)"],
532
  value="成员数据(训练集)", label="模拟真实数据来源")
533
  atk_idx = gr.Slider(0, 999, step=1, value=0, label="样本游标 ID (0-999)")
534
  atk_btn = gr.Button("执行成员推理攻击", variant="primary", size="lg")
535
  atk_question = gr.Markdown()
536
-
537
  with gr.Column(scale=1):
538
  gr.Markdown("**攻击侦测控制台**")
539
  atk_gauge = gr.Plot(label="Loss 分布雷达")
540
  atk_result = gr.Markdown()
 
541
 
542
- atk_btn.click(run_mia_demo, [atk_idx, atk_type], [atk_question, atk_gauge, atk_result])
543
-
544
- # --- Tab 4 ---
545
  with gr.Tab("防御对比"):
546
  gr.Markdown(
547
  "## 防御策略效果对比\n\n"
548
- "| 策略 | 类型 | 原理 | 优势 | 局限 |\n"
549
- "|------|------|------|------|------|\n"
550
- "| 标签平滑 | 训练期 | 软化训练标签 | 降低过拟合 | 可能影响效用 |\n"
551
- "| 输出扰动 | 推理期 | Loss加噪声 | 零效用损失 | 仅遮蔽信号 |\n")
 
552
 
553
  with gr.Row():
554
  with gr.Column():
555
- gr.Plot(value=make_auc_bar(), label="AUC对比")
 
556
  with gr.Column():
557
- gr.Plot(value=make_loss_distribution(), label="Loss分布")
 
558
 
559
  tbl = (
560
- "### 结果汇总\n\n"
561
- "| 策略 | 类型 | AUC | 准确率 |\n"
562
- "|------|------|-----|--------|\n")
563
- for k, name, cat in [('baseline', '基线', '--'), ('smooth_0.02', '标签平滑 (e=0.02)', '训练期'),
564
  ('smooth_0.2', '标签平滑 (e=0.2)', '训练期')]:
565
  if k in mia_results:
566
  a = mia_results[k]['auc']
567
  acc = utility_results.get(k, {}).get('accuracy', 0) * 100
568
- tbl += "| " + name + " | " + cat + " | " + f"{a:.4f}" + " | " + f"{acc:.1f}" + "% |\n"
 
569
  for k, name in [('perturbation_0.01', '输出扰动 (s=0.01)'), ('perturbation_0.015', '输出扰动 (s=0.015)'),
570
  ('perturbation_0.02', '输出扰动 (s=0.02)')]:
571
  if k in perturb_results:
572
  a = perturb_results[k]['auc']
573
- tbl += "| " + name + " | 推理期 | " + f"{a:.4f}" + " | " + f"{bl_acc:.1f}" + "% (不变) |\n"
 
574
  gr.Markdown(tbl)
575
 
576
- # --- Tab 5 ---
577
  with gr.Tab("防御详解"):
578
  gr.Markdown(
579
- "## 标签平滑 (Label Smoothing)\n\n"
580
  "**类型**: 训练期防御\n\n"
581
- "将训练标签从硬标签 (one-hot) 转换为软标签,降低模型对训练样本的过度拟合。\n\n"
582
- "y_smooth = (1 - e) * y_onehot + e / V\n\n"
583
- "| 参数 | AUC | 准确率 |\n"
584
- "|------|-----|--------|\n"
585
- "| 基线 (e=0) | " + f"{bl_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% |\n"
586
- "| e=0.02 | " + f"{s002_auc:.4f}" + " | " + f"{s002_acc:.1f}" + "% |\n"
587
- "| e=0.2 | " + f"{s02_auc:.4f}" + " | " + f"{s02_acc:.1f}" + "% |\n\n"
 
 
588
  "---\n\n"
589
- "## 输出扰动 (Output Perturbation)\n\n"
590
  "**类型**: 推理期防御\n\n"
591
- "在推理阶段对Loss值注入高斯噪声,不修改模型参数,准确率完全不变。\n\n"
592
- "Loss_perturbed = Loss_original + N(0, s^2)\n\n"
 
593
  "| 参数 | AUC | AUC降幅 | 准确率 |\n"
594
  "|------|-----|---------|--------|\n"
595
- "| 基线 | " + f"{bl_auc:.4f}" + " | -- | " + f"{bl_acc:.1f}" + "% |\n"
596
  "| s=0.01 | " + f"{op001_auc:.4f}" + " | " + f"{bl_auc-op001_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% (不变) |\n"
597
  "| s=0.015 | " + f"{op0015_auc:.4f}" + " | " + f"{bl_auc-op0015_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% (不变) |\n"
598
  "| s=0.02 | " + f"{op002_auc:.4f}" + " | " + f"{bl_auc-op002_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% (不变) |\n\n"
599
  "---\n\n"
600
- "## 综合对比\n\n"
601
  "| 维度 | 标签平滑 | 输出扰动 |\n"
602
  "|------|---------|----------|\n"
603
  "| 作用阶段 | 训练期 | 推理期 |\n"
604
  "| 是否需要重训 | 是 | 否 |\n"
605
- "| 对效用的影响 | 可能有影响 | 无影响 |\n"
606
- "| 防御机制 | 降低过拟合 | 遮蔽统计信号 |\n"
607
- "| 可叠加使用 | | |\n\n"
608
- "> 推荐方案: 标签平滑 (e=0.02) + 输出扰动 (s=0.02) 双重防护\n")
609
 
610
- # --- Tab 6 ---
611
  with gr.Tab("效用评估"):
612
- gr.Markdown("## 效用评估\n\n> 测试集: 300道数学题\n")
 
 
613
  with gr.Row():
614
  with gr.Column():
615
- gr.Plot(value=make_accuracy_bar(), label="准确率")
 
616
  with gr.Column():
617
- gr.Plot(value=make_tradeoff(), label="隐私-效用权衡")
618
-
619
- # --- Tab 7 ---
620
- with gr.Tab("论文图表"):
621
- gr.Markdown("## 学术图表 (300 DPI)")
622
- for fn, cap in [("fig1_loss_distribution_comparison.png", "图1: Loss布对比"),
623
- ("fig2_privacy_utility_tradeoff_fixed.png", "图2: 隐私-效用权衡"),
624
- ("fig3_defense_comparison_bar.png", "图3: 防御果柱状图")]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
625
  p = os.path.join(BASE_DIR, "figures", fn)
626
  if os.path.exists(p):
627
  gr.Markdown("### " + cap)
628
  gr.Image(value=p, show_label=False, height=400)
629
  gr.Markdown("---")
630
 
631
- # --- Tab 8 ---
632
  with gr.Tab("研究结论"):
633
  gr.Markdown(
634
  "## 研究结论\n\n"
635
  "---\n\n"
636
  "### 一、教育大模型面临显著的成员推理攻击风险\n\n"
637
- "实验结果表明,经LoRA微调的教育辅导模型在面对基于Loss的成员推理攻击时,"
638
- "AUC达到 " + f"{bl_auc:.4f}" + ",显著高于随机猜测基准(0.5)。"
639
- "这意味着攻击者仅通过观察模型对某一样本输出置信度,"
640
- "即可以高于随机的概率推断该样本是否被纳入训练。"
641
- "在教育场景中,训练数据通常包含学生姓名、学号、学业成绩等敏感信息,"
642
  "该攻击能力构成了切实的隐私威胁。\n\n"
643
  "---\n\n"
644
- "### 二、标签平滑的有效性与局限性\n\n"
645
  "标签平滑通过软化训练标签分布,抑制模型对训练样本的过度拟合,"
646
- "缩小成员与非成员之间的Loss分布差异。\n\n"
647
- "- e=0.02: AUC从" + f"{bl_auc:.4f}" + "降至" + f"{s002_auc:.4f}"
648
- + ",准确率" + f"{s002_acc:.1f}" + "%,隐私保护与效用保持间取得较好平衡\n"
649
- "- e=0.2: AUC进一步" + f"{s02_auc:.4f}"
650
- + ",防御效果更为显著,准确率" + f"{s02_acc:.1f}" + "%。\n\n"
651
- "该结表明平滑系数的选取需在隐私保护度与模型效用之间进行权衡。\n\n"
 
 
652
  "---\n\n"
653
- "### 三、输出扰动的独特优势\n\n"
654
- "输出扰动在推理阶段对Loss值注入高斯噪声,"
655
- "核心优势在于完全不改变模型参数,对模型效用无任何影响。\n\n"
656
- "- s=0.02: AUC从" + f"{bl_auc:.4f}" + "至" + f"{op002_auc:.4f}"
657
- + ",准确率保持" + f"{bl_acc:.1f}" + "%不变。\n\n"
658
- "这是一种零效用成本的防御手段,适合已部署系统进行后期隐私加固。\n\n"
 
 
 
 
 
659
  "---\n\n"
660
  "### 四、隐私-效用权衡的定量分析\n\n"
661
- "| 策略 | AUC | 准确率 | 特点 |\n"
662
- "|------|-----|--------|------|\n"
663
- "| 基线 | " + f"{bl_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | 风险最高 |\n"
664
- "| 标签平滑 e=0.02 | " + f"{s002_auc:.4f}" + " | " + f"{s002_acc:.1f}" + "% | 效用保持良好 |\n"
665
- "| 标签平滑 e=0.2 | " + f"{s02_auc:.4f}" + " | " + f"{s02_acc:.1f}" + "% | 强力防御 |\n"
666
- "| 输出扰动 s=0.02 | " + f"{op002_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | 零效用损失 |\n\n"
667
- "将训练期标签平滑(e=0.02)与推理期输出扰动(s=0.02)组合使用,"
668
- "可在两个独立维度上削弱攻击者推断能力,实现更为全面的隐私保护,"
669
- "同时将效用损失控制可接受范围内。\n")
 
 
670
 
671
  gr.Markdown(
672
  "---\n\n<center>\n\n"
 
5
  import matplotlib
6
  matplotlib.use('Agg')
7
  import matplotlib.pyplot as plt
 
8
  import gradio as gr
9
 
 
 
 
10
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
11
 
12
 
 
35
  plt.rcParams['font.sans-serif'] = ['DejaVu Sans']
36
  plt.rcParams['axes.unicode_minus'] = False
37
 
 
38
  bl_auc = mia_results.get('baseline', {}).get('auc', 0)
39
  s002_auc = mia_results.get('smooth_0.02', {}).get('auc', 0)
40
  s02_auc = mia_results.get('smooth_0.2', {}).get('auc', 0)
 
48
  bl_nm_mean = mia_results.get('baseline', {}).get('non_member_loss_mean', 0.23)
49
  bl_m_std = mia_results.get('baseline', {}).get('member_loss_std', 0.03)
50
  bl_nm_std = mia_results.get('baseline', {}).get('non_member_loss_std', 0.03)
51
+
52
+ s002_m_mean = mia_results.get('smooth_0.02', {}).get('member_loss_mean', 0.20)
53
+ s002_nm_mean = mia_results.get('smooth_0.02', {}).get('non_member_loss_mean', 0.22)
54
+ s002_m_std = mia_results.get('smooth_0.02', {}).get('member_loss_std', 0.03)
55
+ s002_nm_std = mia_results.get('smooth_0.02', {}).get('non_member_loss_std', 0.03)
56
+
57
+ s02_m_mean = mia_results.get('smooth_0.2', {}).get('member_loss_mean', 0.21)
58
+ s02_nm_mean = mia_results.get('smooth_0.2', {}).get('non_member_loss_mean', 0.22)
59
+ s02_m_std = mia_results.get('smooth_0.2', {}).get('member_loss_std', 0.03)
60
+ s02_nm_std = mia_results.get('smooth_0.2', {}).get('non_member_loss_std', 0.03)
61
+
62
  model_name_str = config.get('model_name', 'Qwen/Qwen2.5-Math-1.5B-Instruct')
63
  data_size_str = str(config.get('data_size', 2000))
64
 
65
+ MODEL_PARAMS = {
66
+ "baseline": {"m_mean": bl_m_mean, "nm_mean": bl_nm_mean, "m_std": bl_m_std, "nm_std": bl_nm_std, "key": "baseline", "label": "Baseline"},
67
+ "smooth_0.02": {"m_mean": s002_m_mean, "nm_mean": s002_nm_mean, "m_std": s002_m_std, "nm_std": s002_nm_std, "key": "smooth_0.02", "label": "LS(e=0.02)"},
68
+ "smooth_0.2": {"m_mean": s02_m_mean, "nm_mean": s02_nm_mean, "m_std": s02_m_std, "nm_std": s02_nm_std, "key": "smooth_0.2", "label": "LS(e=0.2)"},
69
+ }
70
+
71
 
72
  # ========================================
73
+ # Charts
74
  # ========================================
75
 
76
  def make_pie_chart():
 
78
  for item in member_data + non_member_data:
79
  t = item.get('task_type', 'unknown')
80
  tc[t] = tc.get(t, 0) + 1
81
+ nm = {'calculation': 'Calculation\n(Ji Chu Ji Suan)', 'word_problem': 'Word Problem\n(Ying Yong Ti)',
82
+ 'concept': 'Concept Q&A\n(Gai Nian Wen Da)', 'error_correction': 'Error Correction\n(Cuo Ti Ding Zheng)'}
83
  labels = [nm.get(k, k) for k in tc]
84
  sizes = list(tc.values())
85
  colors = ['#5B8FF9', '#5AD8A6', '#F6BD16', '#E86452']
86
+ fig, ax = plt.subplots(figsize=(6.5, 5.5))
87
  wedges, texts, autotexts = ax.pie(
88
  sizes, labels=labels, autopct='%1.1f%%', colors=colors[:len(labels)],
89
+ startangle=90, textprops={'fontsize': 9},
90
  wedgeprops={'edgecolor': 'white', 'linewidth': 2})
91
  for t in autotexts:
92
  t.set_fontsize(10)
93
  t.set_fontweight('bold')
 
94
  plt.tight_layout()
95
  return fig
96
 
97
 
98
  def make_loss_distribution():
99
  items = []
100
+ for k, t in [('baseline', 'Baseline'), ('smooth_0.02', 'LS(e=0.02)'), ('smooth_0.2', 'LS(e=0.2)')]:
101
  if k in full_results:
102
  auc = mia_results.get(k, {}).get('auc', 0)
103
+ items.append((k, t + "\nAUC=" + f"{auc:.4f}"))
104
  n = len(items)
105
  if n == 0:
106
  fig, ax = plt.subplots()
107
  ax.text(0.5, 0.5, 'No data', ha='center')
108
  return fig
109
+ fig, axes = plt.subplots(1, n, figsize=(4.8 * n, 4.2))
110
  if n == 1:
111
  axes = [axes]
112
  for ax, (k, title) in zip(axes, items):
113
  m = full_results[k]['member_losses']
114
+ nm_l = full_results[k]['non_member_losses']
115
+ lo = min(min(m), min(nm_l))
116
+ hi = max(max(m), max(nm_l))
117
+ bins = np.linspace(lo, hi, 30)
118
+ ax.hist(m, bins=bins, alpha=0.55, color='#5B8FF9', label='Member', density=True)
119
+ ax.hist(nm_l, bins=bins, alpha=0.55, color='#E86452', label='Non-Member', density=True)
120
+ ax.set_title(title, fontsize=10, fontweight='bold')
121
+ ax.set_xlabel('Loss', fontsize=8)
122
+ ax.set_ylabel('Density', fontsize=8)
123
+ ax.legend(fontsize=7, loc='upper right')
124
+ ax.tick_params(labelsize=7)
125
  ax.grid(True, linestyle='--', alpha=0.3)
126
  ax.spines['top'].set_visible(False)
127
  ax.spines['right'].set_visible(False)
128
+ plt.tight_layout(pad=1.5)
129
  return fig
130
 
131
 
132
  def make_auc_bar():
133
  methods, aucs, colors = [], [], []
134
+ for k, name, c in [('baseline', 'Baseline', '#8C8C8C'), ('smooth_0.02', 'LS(e=0.02)', '#5B8FF9'),
135
+ ('smooth_0.2', 'LS(e=0.2)', '#3D76DD')]:
136
  if k in mia_results:
137
  methods.append(name); aucs.append(mia_results[k]['auc']); colors.append(c)
138
+ for k, name, c in [('perturbation_0.01', 'OP(s=0.01)', '#5AD8A6'),
139
+ ('perturbation_0.015', 'OP(s=0.015)', '#2EAD78'),
140
+ ('perturbation_0.02', 'OP(s=0.02)', '#1A7F5A')]:
141
  if k in perturb_results:
142
  methods.append(name); aucs.append(perturb_results[k]['auc']); colors.append(c)
143
  fig, ax = plt.subplots(figsize=(9, 5))
144
  bars = ax.bar(methods, aucs, color=colors, width=0.5, edgecolor='white', linewidth=1.5)
145
  for bar, a in zip(bars, aucs):
146
+ ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.002,
147
+ f'{a:.4f}', ha='center', va='bottom', fontsize=9, fontweight='bold')
148
  ax.axhline(y=0.5, color='#E86452', linestyle='--', linewidth=1.5, alpha=0.6, label='Random Guess (0.5)')
149
  ax.set_ylabel('MIA AUC', fontsize=11)
150
+ ax.set_ylim(0.48, max(aucs) + 0.035 if aucs else 0.7)
 
151
  ax.legend(fontsize=9)
152
  ax.grid(axis='y', linestyle='--', alpha=0.3)
153
  ax.spines['top'].set_visible(False)
 
160
  def make_tradeoff():
161
  fig, ax = plt.subplots(figsize=(8, 6))
162
  pts = []
163
+ for k, name, mk, c, sz in [('baseline', 'Baseline', 'o', '#8C8C8C', 200),
164
+ ('smooth_0.02', 'LS(e=0.02)', 's', '#5B8FF9', 180),
165
+ ('smooth_0.2', 'LS(e=0.2)', 's', '#3D76DD', 180)]:
166
  if k in mia_results and k in utility_results:
167
  pts.append({'n': name, 'a': mia_results[k]['auc'], 'c': utility_results[k]['accuracy'],
168
  'm': mk, 'co': c, 's': sz})
169
  ba = utility_results.get('baseline', {}).get('accuracy', 0.633)
170
+ for k, name, mk, c, sz in [('perturbation_0.01', 'OP(s=0.01)', '^', '#5AD8A6', 190),
171
+ ('perturbation_0.015', 'OP(s=0.015)', 'D', '#2EAD78', 150),
172
+ ('perturbation_0.02', 'OP(s=0.02)', '^', '#1A7F5A', 190)]:
173
  if k in perturb_results:
174
  pts.append({'n': name, 'a': perturb_results[k]['auc'], 'c': ba, 'm': mk, 'co': c, 's': sz})
175
  for p in pts:
 
177
  s=p['s'], edgecolors='white', linewidth=2, zorder=5)
178
  ax.axhline(y=0.5, color='#BFBFBF', linestyle='--', alpha=0.8, label='Random Guess')
179
  ax.set_xlabel('Accuracy', fontsize=11, fontweight='bold')
180
+ ax.set_ylabel('MIA AUC (Privacy Risk)', fontsize=11, fontweight='bold')
181
  ax.set_title('Privacy-Utility Trade-off', fontsize=13, fontweight='bold')
182
  aa = [p['c'] for p in pts]; ab = [p['a'] for p in pts]
183
  if aa and ab:
184
  ax.set_xlim(min(aa)-0.03, max(aa)+0.05)
185
  ax.set_ylim(min(min(ab), 0.5)-0.02, max(ab)+0.025)
186
+ ax.legend(loc='upper right', fontsize=8, fancybox=True)
187
  ax.grid(True, alpha=0.2)
188
  ax.spines['top'].set_visible(False)
189
  ax.spines['right'].set_visible(False)
 
193
 
194
  def make_accuracy_bar():
195
  names, accs, colors = [], [], []
196
+ for k, name, c in [('baseline', 'Baseline', '#8C8C8C'), ('smooth_0.02', 'LS(e=0.02)', '#5B8FF9'),
197
+ ('smooth_0.2', 'LS(e=0.2)', '#3D76DD')]:
198
  if k in utility_results:
199
  names.append(name); accs.append(utility_results[k]['accuracy']*100); colors.append(c)
200
  bp = utility_results.get('baseline', {}).get('accuracy', 0)*100
201
+ for k, name, c in [('perturbation_0.01', 'OP(s=0.01)', '#5AD8A6'),
202
+ ('perturbation_0.015', 'OP(s=0.015)', '#2EAD78'),
203
+ ('perturbation_0.02', 'OP(s=0.02)', '#1A7F5A')]:
204
  if k in perturb_results:
205
  names.append(name); accs.append(bp); colors.append(c)
206
  fig, ax = plt.subplots(figsize=(9, 5))
207
  bars = ax.bar(names, accs, color=colors, width=0.5, edgecolor='white', linewidth=1.5)
208
  for bar, acc in zip(bars, accs):
209
+ ax.text(bar.get_x()+bar.get_width()/2, bar.get_height()+0.5,
210
+ f'{acc:.1f}%', ha='center', va='bottom', fontsize=9, fontweight='bold')
211
  ax.set_ylabel('Accuracy (%)', fontsize=11)
 
212
  ax.set_ylim(0, 100)
213
  ax.grid(axis='y', alpha=0.3)
214
  ax.spines['top'].set_visible(False)
 
218
  return fig
219
 
220
 
221
+ def make_loss_gauge(loss_val, m_mean, nm_mean, threshold, m_std, nm_std):
222
  fig, ax = plt.subplots(figsize=(8, 2.8))
223
+ x_min = min(m_mean - 3*m_std, loss_val - 0.01)
224
+ x_max = max(nm_mean + 3*nm_std, loss_val + 0.01)
 
225
  ax.axvspan(x_min, threshold, alpha=0.12, color='#5B8FF9')
226
  ax.axvspan(threshold, x_max, alpha=0.12, color='#E86452')
227
  ax.axvline(x=threshold, color='#434343', linewidth=2, linestyle='-', zorder=3)
228
  ax.text(threshold, 1.12, 'Threshold', ha='center', va='bottom', fontsize=9,
229
  fontweight='bold', color='#434343', transform=ax.get_xaxis_transform())
 
230
  ax.axvline(x=m_mean, color='#5B8FF9', linewidth=1.2, linestyle='--', alpha=0.6)
231
+ ax.text(m_mean, -0.28, f'Member\n({m_mean:.4f})', ha='center', va='top',
232
  fontsize=7.5, color='#5B8FF9', transform=ax.get_xaxis_transform())
233
  ax.axvline(x=nm_mean, color='#E86452', linewidth=1.2, linestyle='--', alpha=0.6)
234
+ ax.text(nm_mean, -0.28, f'Non-Member\n({nm_mean:.4f})', ha='center', va='top',
235
  fontsize=7.5, color='#E86452', transform=ax.get_xaxis_transform())
 
236
  in_member = loss_val < threshold
237
  mc = '#5B8FF9' if in_member else '#E86452'
238
  ax.plot(loss_val, 0.5, marker='v', markersize=16, color=mc, zorder=5,
 
240
  ax.text(loss_val, 0.78, f'Loss={loss_val:.4f}', ha='center', va='bottom', fontsize=10,
241
  fontweight='bold', color=mc, transform=ax.get_xaxis_transform(),
242
  bbox=dict(boxstyle='round,pad=0.3', facecolor='white', edgecolor=mc, alpha=0.95))
 
243
  mc_x = (x_min + threshold) / 2
244
  nmc_x = (threshold + x_max) / 2
245
  ax.text(mc_x, 0.5, 'Member Zone', ha='center', va='center', fontsize=10,
246
  color='#5B8FF9', fontweight='bold', alpha=0.5, transform=ax.get_xaxis_transform())
247
  ax.text(nmc_x, 0.5, 'Non-Member Zone', ha='center', va='center', fontsize=10,
248
  color='#E86452', fontweight='bold', alpha=0.5, transform=ax.get_xaxis_transform())
 
249
  ax.set_xlim(x_min, x_max)
250
  ax.set_yticks([])
251
+ for sp in ['top', 'right', 'left']:
252
+ ax.spines[sp].set_visible(False)
 
253
  ax.set_xlabel('Loss Value', fontsize=9)
254
  plt.tight_layout()
255
  return fig
256
 
257
 
258
  # ========================================
259
+ # Callbacks
260
  # ========================================
261
 
262
  def show_random_sample(data_type):
 
265
  meta = sample['metadata']
266
  task_map = {'calculation': '基础计算', 'word_problem': '应用题',
267
  'concept': '概念问答', 'error_correction': '���题订正'}
 
268
  info_md = (
269
  "**截获的隐私元数据**\n\n"
270
  "- **姓名**: " + clean_text(str(meta.get('name', ''))) + "\n"
271
  "- **学号**: " + clean_text(str(meta.get('student_id', ''))) + "\n"
272
  "- **班级**: " + clean_text(str(meta.get('class', ''))) + "\n"
273
  "- **成绩**: " + clean_text(str(meta.get('score', ''))) + " 分\n"
274
+ "- **类型**: " + task_map.get(sample.get('task_type', ''), '') + "\n")
 
275
  return info_md, clean_text(sample.get('question', '')), clean_text(sample.get('answer', ''))
276
 
277
 
278
+ MODEL_CHOICE_MAP = {
279
+ "基线模型 (Baseline)": "baseline",
280
+ "标签平滑模型 (e=0.02)": "smooth_0.02",
281
+ "标签平滑模型 (e=0.2)": "smooth_0.2",
282
+ }
283
+
284
+
285
+ def run_mia_demo(sample_index, data_type, model_choice):
286
  is_member = (data_type == "成员数据(训练集)")
287
  data = member_data if is_member else non_member_data
288
  idx = min(int(sample_index), len(data) - 1)
289
  sample = data[idx]
290
 
291
+ model_key = MODEL_CHOICE_MAP.get(model_choice, "baseline")
292
+ params = MODEL_PARAMS.get(model_key, MODEL_PARAMS["baseline"])
293
+
294
+ fr = full_results.get(model_key, full_results.get('baseline', {}))
295
+ if is_member and idx < len(fr.get('member_losses', [])):
296
+ loss = fr['member_losses'][idx]
297
+ elif not is_member and idx < len(fr.get('non_member_losses', [])):
298
+ loss = fr['non_member_losses'][idx]
299
  else:
300
+ loss = float(np.random.normal(params['m_mean'] if is_member else params['nm_mean'], 0.02))
301
 
302
+ m_mean = params['m_mean']
303
+ nm_mean = params['nm_mean']
304
+ m_std = params['m_std']
305
+ nm_std = params['nm_std']
306
+ threshold = (m_mean + nm_mean) / 2.0
307
  pred_member = (loss < threshold)
308
  attack_correct = (pred_member == is_member)
309
 
310
+ gauge_fig = make_loss_gauge(loss, m_mean, nm_mean, threshold, m_std, nm_std)
 
 
 
 
 
 
 
 
311
 
312
+ pred_label = "训练成员" if pred_member else "非训练成员"
313
+ pred_color = "🔴" if pred_member else "🟢"
314
+ actual_label = "训练成员" if is_member else "非训练成员"
315
+ actual_color = "🔴" if is_member else "🟢"
 
 
316
 
317
  if attack_correct and pred_member and is_member:
318
  verdict = "⚠️ **攻击成功: 发生了隐私泄露**"
 
324
  verdict = "❌ **攻击失误**"
325
  verdict_detail = "攻击者的判定与真实身份不符。"
326
 
327
+ model_auc = mia_results.get(model_key, {}).get('auc', 0)
328
  result_md = (
329
+ verdict + "\n\n" + verdict_detail + "\n\n"
330
+ "**当前攻击模型**: " + params['label'] + " (AUC=" + f"{model_auc:.4f}" + ")\n\n"
331
  "| | 攻击者计算得出 | 系统真实身份 |\n"
332
  "|---|---|---|\n"
333
  "| 判定 | " + pred_color + " " + pred_label + " | " + actual_color + " " + actual_label + " |\n"
334
+ "| Loss | " + f"{loss:.4f}" + " | Threshold: " + f"{threshold:.4f}" + " |\n")
 
335
 
336
  q_text = "**样本追踪号 [" + str(idx) + "] :**\n\n" + clean_text(sample.get('question', ''))[:500]
337
  return q_text, gauge_fig, result_md
338
 
339
 
340
  # ========================================
341
+ # Interface
342
  # ========================================
343
 
344
  CSS = """
 
345
  body { background-color: #f0f4f8 !important; }
346
  .gradio-container {
347
+ max-width: 1200px !important; margin: auto !important;
 
348
  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "PingFang SC", "Microsoft YaHei", sans-serif !important;
349
  }
350
+ .tab-nav { border-bottom: 2px solid #e1e8f0 !important; margin-bottom: 20px !important; }
 
 
 
 
 
 
351
  .tab-nav button {
352
+ font-size: 15px !important; padding: 14px 24px !important; font-weight: 500 !important;
353
+ color: #64748b !important; border-radius: 8px 8px 0 0 !important;
354
+ transition: all 0.3s ease !important; background: transparent !important; border: none !important;
 
 
 
 
 
 
 
 
 
355
  }
356
+ .tab-nav button:hover { color: #3b82f6 !important; background: rgba(59,130,246,0.05) !important; }
357
+ .tab-nav button.selected { font-weight: 700 !important; color: #2563eb !important; border-bottom: 3px solid #2563eb !important; }
 
 
 
 
 
358
  .tabitem {
359
+ background: #fff !important; border-radius: 12px !important;
360
+ box-shadow: 0 4px 20px rgba(0,0,0,0.04) !important; padding: 30px !important; border: 1px solid #e2e8f0 !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  }
362
+ .prose h1 { font-size: 2.2rem !important; color: #0f172a !important; font-weight: 800 !important; text-align: center !important; }
363
+ .prose h2 { font-size: 1.4rem !important; color: #1e293b !important; margin-top: 1.5em !important; padding-bottom: 0.4em !important; border-bottom: 2px solid #f1f5f9 !important; font-weight: 700 !important; }
364
+ .prose h3 { font-size: 1.15rem !important; color: #334155 !important; font-weight: 600 !important; }
365
+ .prose table { width: 100% !important; border-collapse: separate !important; border-spacing: 0 !important; margin: 1.5em 0 !important; border-radius: 10px !important; overflow: hidden !important; box-shadow: 0 0 0 1px #e2e8f0, 0 4px 6px -1px rgba(0,0,0,0.05) !important; font-size: 0.92rem !important; }
366
+ .prose th { background: #f8fafc !important; color: #475569 !important; font-weight: 600 !important; font-size: 0.85rem !important; letter-spacing: 0.05em !important; padding: 12px 14px !important; border-bottom: 2px solid #e2e8f0 !important; }
367
+ .prose tr:nth-child(even) td { background: #f8fafc !important; }
368
+ .prose td { padding: 10px 14px !important; color: #334155 !important; border-bottom: 1px solid #e2e8f0 !important; }
369
+ .prose tr:last-child td { border-bottom: none !important; }
370
+ .prose tr:hover td { background-color: #f1f5f9 !important; }
371
+ .prose blockquote { border-left: 4px solid #3b82f6 !important; background: linear-gradient(to right,#eff6ff,#fff) !important; padding: 14px 18px !important; border-radius: 0 8px 8px 0 !important; font-size: 0.93rem !important; color: #1e40af !important; margin: 1.5em 0 !important; }
372
+ button.primary { background: linear-gradient(135deg,#3b82f6 0%,#2563eb 100%) !important; border: none !important; box-shadow: 0 4px 12px rgba(37,99,235,0.25) !important; font-weight: 600 !important; }
373
+ button.primary:hover { transform: translateY(-2px) !important; box-shadow: 0 6px 16px rgba(37,99,235,0.35) !important; }
374
  footer { display: none !important; }
375
  """
376
 
 
380
  gr.Markdown(
381
  "# 教育大模型中的成员推理攻击及其防御研究\n\n"
382
  "> 探究教育场景下大语言模型的隐私泄露风险,"
383
+ "验证标签平滑与输出扰动两种防御策略的有效性及其对模型效用的影响。\n")
384
 
 
385
  with gr.Tab("项目概览"):
386
  gr.Markdown(
387
  "## 研究背景\n\n"
388
+ "大语言模型在教育领域的应用日益广泛,模型训练不可避免地接触到学生敏感数据。"
389
+ "**成员推理攻击 (Membership Inference Attack, MIA)** 能够判断某条数据是否参与了模型训练,"
390
+ "从而推断学生的隐私信息,构成切实的隐私威胁。\n\n"
391
  "---\n\n"
392
  "## 实验设计\n\n"
393
  "| 阶段 | 内容 | 方法 |\n"
394
  "|------|------|------|\n"
395
+ "| 1. 数据准备 | 2000条小学数学辅导对话 | 模板化生成,含姓名/学号/成绩等隐私字段 |\n"
396
+ "| 2. 基线模型训练 | Qwen2.5-Math-1.5B + LoRA | 标准微调,无任何防御措施 |\n"
397
+ "| 3. 标签平滑模型训练 | 两组不同平滑系数 | e=0.02(温和) 与 e=0.2(强力) 分别训练 |\n"
398
+ "| 4. MIA攻击测试 | 对三个模型分别发起攻击 | 基于Loss阈值的成员推理,AUC评估 |\n"
399
+ "| 5. 输出扰动测试 | 在基线模型上添加噪声 | 高斯噪声 s=0.01/0.015/0.02 三组 |\n"
400
+ "| 6. 效用评估 | 300道数学测试题 | 三个模型分别测试准确率 |\n"
401
+ "| 7. 综合分析 | 隐私-效用权衡 | 散点图 + 定量对比 |\n\n"
402
  "---\n\n"
403
  "## 实验配置\n\n"
404
+ "| 项 | 值 |\n"
405
+ "|------|-----|\n"
406
  "| 基座模型 | " + model_name_str + " |\n"
407
+ "| 微调方法 | LoRA (r=8, alpha=16, target: q/k/v/o_proj) |\n"
408
  "| 训练轮数 | 10 epochs |\n"
409
+ "| 数据量 | " + data_size_str + " 条 (成员1000 + 非成员1000) |\n"
410
+ "| 训练模型数 | 3个 (基线 + 标签平滑x2) |\n")
411
 
 
412
  with gr.Tab("数据展示"):
413
  gr.Markdown("## 数据集概况\n\n"
414
+ "成员数据1000条(训练集)非成员数据1000条(对照组),每条均包含学生隐私字段。\n")
415
  with gr.Row():
416
  with gr.Column(scale=1):
417
+ gr.Plot(value=make_pie_chart())
418
  with gr.Column(scale=1):
419
  gr.Markdown("**选择靶向数据池**")
420
  data_sel = gr.Radio(["成员数据(训练集)", "非成员数据(测试集)"],
421
  value="成员数据(训练集)", label="")
422
  sample_btn = gr.Button("随机��取", variant="primary")
423
  sample_info = gr.Markdown()
 
424
  gr.Markdown("---\n\n**原始对话内容**")
425
  with gr.Row():
426
  sample_q = gr.Textbox(label="学生提问 (Prompt)", lines=5, interactive=False)
427
  sample_a = gr.Textbox(label="模型回答 (Ground Truth)", lines=5, interactive=False)
 
428
  sample_btn.click(show_random_sample, [data_sel], [sample_info, sample_q, sample_a])
429
 
 
430
  with gr.Tab("MIA攻击演示"):
431
  gr.Markdown(
432
  "## 发起成员推理攻击\n\n"
433
+ "选择目标模型和数据来源,系统将计算该样本的Loss值并实施成员身份判定。\n")
 
434
  with gr.Row():
435
  with gr.Column(scale=1):
436
+ atk_model = gr.Radio(
437
+ ["基线模型 (Baseline)", "标签平滑模型 (e=0.02)", "标签平滑模型 (e=0.2)"],
438
+ value="基线模型 (Baseline)", label="选择攻击目标模型")
439
  atk_type = gr.Radio(["成员数据(训练集)", "非成员数据(测试集)"],
440
  value="成员数据(训练集)", label="模拟真实数据来源")
441
  atk_idx = gr.Slider(0, 999, step=1, value=0, label="样本游标 ID (0-999)")
442
  atk_btn = gr.Button("执行成员推理攻击", variant="primary", size="lg")
443
  atk_question = gr.Markdown()
 
444
  with gr.Column(scale=1):
445
  gr.Markdown("**攻击侦测控制台**")
446
  atk_gauge = gr.Plot(label="Loss 分布雷达")
447
  atk_result = gr.Markdown()
448
+ atk_btn.click(run_mia_demo, [atk_idx, atk_type, atk_model], [atk_question, atk_gauge, atk_result])
449
 
 
 
 
450
  with gr.Tab("防御对比"):
451
  gr.Markdown(
452
  "## 防御策略效果对比\n\n"
453
+ "本研究测试了两类防御策略,以下基于实验数据给出对比分析。\n\n"
454
+ "| 策略 | 类型 | 原理 | 实验验证的优势 | 实验观察到的局限 |\n"
455
+ "|------|------|------|---------------|----------------|\n"
456
+ "| 标签平滑 | 训练期 | 软化训练标签,抑制对训练数据的过度记忆 | e=0.02时AUC降至" + f"{s002_auc:.4f}" + ",准确率提升至" + f"{s002_acc:.1f}" + "% | 需要重新训练模型;e过大时可能影响效用 |\n"
457
+ "| 输出扰动 | 推理期 | 对模型输出Loss添加高斯噪声,模糊统计差异 | s=0.02时AUC降至" + f"{op002_auc:.4f}" + ",准确率完全不变 | 仅遮蔽Loss层面的统计信号,不改变模型本身的记忆特性 |\n")
458
 
459
  with gr.Row():
460
  with gr.Column():
461
+ gr.Markdown("### AUC对比(全部策略)")
462
+ gr.Plot(value=make_auc_bar())
463
  with gr.Column():
464
+ gr.Markdown("### Loss分布对比(三个模型)")
465
+ gr.Plot(value=make_loss_distribution())
466
 
467
  tbl = (
468
+ "### 完整实验结果\n\n"
469
+ "| 策略 | 类型 | AUC | 准确率 | AUC变化 |\n"
470
+ "|------|------|-----|--------|--------|\n")
471
+ for k, name, cat in [('baseline', '基线 (无防御)', '--'), ('smooth_0.02', '标签平滑 (e=0.02)', '训练期'),
472
  ('smooth_0.2', '标签平滑 (e=0.2)', '训练期')]:
473
  if k in mia_results:
474
  a = mia_results[k]['auc']
475
  acc = utility_results.get(k, {}).get('accuracy', 0) * 100
476
+ delta = "--" if k == 'baseline' else f"{a - bl_auc:+.4f}"
477
+ tbl += "| " + name + " | " + cat + " | " + f"{a:.4f}" + " | " + f"{acc:.1f}" + "% | " + delta + " |\n"
478
  for k, name in [('perturbation_0.01', '输出扰动 (s=0.01)'), ('perturbation_0.015', '输出扰动 (s=0.015)'),
479
  ('perturbation_0.02', '输出扰动 (s=0.02)')]:
480
  if k in perturb_results:
481
  a = perturb_results[k]['auc']
482
+ delta = f"{a - bl_auc:+.4f}"
483
+ tbl += "| " + name + " | 推理期 | " + f"{a:.4f}" + " | " + f"{bl_acc:.1f}" + "% (不变) | " + delta + " |\n"
484
  gr.Markdown(tbl)
485
 
 
486
  with gr.Tab("防御详解"):
487
  gr.Markdown(
488
+ "## 一、标签平滑 (Label Smoothing)\n\n"
489
  "**类型**: 训练期防御\n\n"
490
+ "将训练标签从硬标签 (one-hot) 转换为软标签,降低模型对训练样本的过度拟合程度,"
491
+ "从而缩小成员与非成员之间的Loss分布差异。\n\n"
492
+ "$$y_{smooth} = (1 - \\varepsilon) \\cdot y_{onehot} + \\frac{\\varepsilon}{V}$$\n\n"
493
+ "其中 $\\varepsilon$ 为平滑系数,$V$ 为词汇表大小。\n\n"
494
+ "| 参数 | AUC | 准确率 | 分析 |\n"
495
+ "|------|-----|--------|------|\n"
496
+ "| 基线 (e=0) | " + f"{bl_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | 无防御,攻击风险较高 |\n"
497
+ "| e=0.02 | " + f"{s002_auc:.4f}" + " | " + f"{s002_acc:.1f}" + "% | 温和平滑,隐私与效用较好平衡 |\n"
498
+ "| e=0.2 | " + f"{s02_auc:.4f}" + " | " + f"{s02_acc:.1f}" + "% | 强力平滑,AUC显著下降 |\n\n"
499
  "---\n\n"
500
+ "## 二、输出扰动 (Output Perturbation)\n\n"
501
  "**类型**: 推理期防御\n\n"
502
+ "在推理阶段对模型返回的Loss值注入高斯噪声,使攻击者难以从Loss的微小差异中区分成员与非成员。\n\n"
503
+ "$$\\mathcal{L}_{perturbed} = \\mathcal{L}_{original} + \\mathcal{N}(0, \\sigma^2)$$\n\n"
504
+ "其中 $\\sigma$ 为噪声标准差,控制扰动强度。\n\n"
505
  "| 参数 | AUC | AUC降幅 | 准确率 |\n"
506
  "|------|-----|---------|--------|\n"
507
+ "| 基线 (s=0) | " + f"{bl_auc:.4f}" + " | -- | " + f"{bl_acc:.1f}" + "% |\n"
508
  "| s=0.01 | " + f"{op001_auc:.4f}" + " | " + f"{bl_auc-op001_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% (不变) |\n"
509
  "| s=0.015 | " + f"{op0015_auc:.4f}" + " | " + f"{bl_auc-op0015_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% (不变) |\n"
510
  "| s=0.02 | " + f"{op002_auc:.4f}" + " | " + f"{bl_auc-op002_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% (不变) |\n\n"
511
  "---\n\n"
512
+ "## 三、综合对比\n\n"
513
  "| 维度 | 标签平滑 | 输出扰动 |\n"
514
  "|------|---------|----------|\n"
515
  "| 作用阶段 | 训练期 | 推理期 |\n"
516
  "| 是否需要重训 | 是 | 否 |\n"
517
+ "| 对效用的影响 | 取决于平滑系数 | 无影响 |\n"
518
+ "| 防御原理 | 抑制过拟合,降低记忆 | 遮蔽Loss层面统计信号 |\n"
519
+ "| 部署难度 | 需训练阶段介入 | 推理阶段即插即用 |\n"
520
+ "| 可叠加使用 | | |\n")
521
 
 
522
  with gr.Tab("效用评估"):
523
+ gr.Markdown(
524
+ "## 效用评估\n\n"
525
+ "> 测试集: 300道数学题,覆盖基础计算、应用题、概念问答三类任务。\n")
526
  with gr.Row():
527
  with gr.Column():
528
+ gr.Markdown("### 准确率对比")
529
+ gr.Plot(value=make_accuracy_bar())
530
  with gr.Column():
531
+ gr.Markdown("### 隐私-效用权衡")
532
+ gr.Plot(value=make_tradeoff())
533
+
534
+ gr.Markdown(
535
+ "### 效用分析\n\n"
536
+ "| 策略 | 准确率 | AUC | 效用变化 | 析 |\n"
537
+ "|------|--------|-----|---------|------|\n"
538
+ "| 基线 | " + f"{bl_acc:.1f}" + "% | " + f"{bl_auc:.4f}" + " | -- | 用基准,但隐私风险最高 |\n"
539
+ "| LS(e=0.02) | " + f"{s002_acc:.1f}" + "% | " + f"{s002_auc:.4f}" + " | " + f"{s002_acc-bl_acc:+.1f}" + "pp | 适度正则化提升了泛化能力,准确率反而上升 |\n"
540
+ "| LS(e=0.2) | " + f"{s02_acc:.1f}" + "% | " + f"{s02_auc:.4f}" + " | " + f"{s02_acc-bl_acc:+.1f}" + "pp | 强力平滑对效用有一定影响,但仍在可接受范围 |\n"
541
+ "| OP(s=0.01) | " + f"{bl_acc:.1f}" + "% | " + f"{op001_auc:.4f}" + " | 0 | 零效用损失 |\n"
542
+ "| OP(s=0.015) | " + f"{bl_acc:.1f}" + "% | " + f"{op0015_auc:.4f}" + " | 0 | 零效用损失 |\n"
543
+ "| OP(s=0.02) | " + f"{bl_acc:.1f}" + "% | " + f"{op002_auc:.4f}" + " | 0 | 零效用损失 |\n\n"
544
+ "> **关键发现**: 标签平滑 e=0.02 不仅降低了隐私风险,还因正则化效应提升了模型的泛化能力。"
545
+ "输出扰动则在完全不影响效用的前提下实现了有效防御。"
546
+ "两类策略在效用维度上呈现互补特性:前者可能提升效用,后者保证效用不变。\n")
547
+
548
+ with gr.Tab("实验结果可视化"):
549
+ gr.Markdown("## 实验核心图表")
550
+ for fn, cap in [("fig1_loss_distribution_comparison.png", "图1: 成员与非成员Loss分布对比 (Baseline vs Label Smoothing)"),
551
+ ("fig2_privacy_utility_tradeoff_fixed.png", "图2: 隐私风险与模型效用权衡散点图"),
552
+ ("fig3_defense_comparison_bar.png", "图3: 各防御策略MIA攻击AUC对比")]:
553
  p = os.path.join(BASE_DIR, "figures", fn)
554
  if os.path.exists(p):
555
  gr.Markdown("### " + cap)
556
  gr.Image(value=p, show_label=False, height=400)
557
  gr.Markdown("---")
558
 
 
559
  with gr.Tab("研究结论"):
560
  gr.Markdown(
561
  "## 研究结论\n\n"
562
  "---\n\n"
563
  "### 一、教育大模型面临显著的成员推理攻击风险\n\n"
564
+ "实验结果表明,经LoRA微调的Qwen2.5-Math-1.5B教育辅导模型在面对基于Loss的成员推理攻击时,"
565
+ "AUC达到 **" + f"{bl_auc:.4f}" + "**,显著高于随机猜测基准 (0.5)。"
566
+ "成员数据平均Loss (" + f"{bl_m_mean:.4f}" + ") 明显低于非成员数据 (" + f"{bl_nm_mean:.4f}" + "),"
567
+ "表明模型对训练数据产生了可被利用的记忆效应。"
568
+ "在教育场景中,训练数据包含学生姓名、学号、学业成绩等敏感信息,"
569
  "该攻击能力构成了切实的隐私威胁。\n\n"
570
  "---\n\n"
571
+ "### 二、标签平滑作为训练期防御策略的有效性与局限性\n\n"
572
  "标签平滑通过软化训练标签分布,抑制模型对训练样本的过度拟合,"
573
+ "缩小成员与非成员之间的Loss分布差异。实验观察到:\n\n"
574
+ "- **e=0.02** (温和平滑): AUC从 " + f"{bl_auc:.4f}" + " 降至 " + f"{s002_auc:.4f}"
575
+ + ",准确率" + f"{s002_acc:.1f}" + "%。"
576
+ "适度的正则化效应不仅低了隐私风险,还提升了模型的泛化能力。\n"
577
+ "- **e=0.2** (强力平滑): AUC进一步降至 " + f"{s02_auc:.4f}"
578
+ + ",防御效显著增,准确率为 " + f"{s02_acc:.1f}" + "%。\n\n"
579
+ "该结果表明平滑系数的选取需在隐私保护强度与模型效用之间进行权衡。"
580
+ "从实验数据看,e=0.02在两者���间取得了较好的平衡点。\n\n"
581
  "---\n\n"
582
+ "### 三、输出扰动作为推理期防御策略的独特优势\n\n"
583
+ "输出扰动在推理阶段对模型输出的Loss值注入高斯噪声,"
584
+ "核心优势在于完全不改变模型参数,因此对模型效用无任何影响。实验中测试了三组噪声强度:\n\n"
585
+ "| 噪声强度 | AUC | AUC | 准确率 |\n"
586
+ "|----------|-----|---------|--------|\n"
587
+ "| s=0.01 | " + f"{op001_auc:.4f}" + " | " + f"{bl_auc-op001_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% (不变) |\n"
588
+ "| s=0.015 | " + f"{op0015_auc:.4f}" + " | " + f"{bl_auc-op0015_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% (不变) |\n"
589
+ "| s=0.02 | " + f"{op002_auc:.4f}" + " | " + f"{bl_auc-op002_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% (不变) |\n\n"
590
+ "随着噪声强度增大,AUC呈单调下降趋势,表明更强的扰动更有效地模糊了成员与非成员的统计差异。"
591
+ "s=0.02时AUC降至 " + f"{op002_auc:.4f}" + ",接近标签平滑 e=0.2 的防御效果,"
592
+ "但完全不需要重新训练模型,适合已部署系统的后期隐私加固。\n\n"
593
  "---\n\n"
594
  "### 四、隐私-效用权衡的定量分析\n\n"
595
+ "| 策略 | AUC | 准确率 | AUC变化 | 效用变化 | 特点 |\n"
596
+ "|------|-----|--------|--------|---------|------|\n"
597
+ "| 基线 (无防御) | " + f"{bl_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | -- | -- | 风险最高 |\n"
598
+ "| 标签平滑 e=0.02 | " + f"{s002_auc:.4f}" + " | " + f"{s002_acc:.1f}" + "% | " + f"{s002_auc-bl_auc:+.4f}" + " | " + f"{s002_acc-bl_acc:+.1f}" + "pp | 隐私与效用双优 |\n"
599
+ "| 标签平滑 e=0.2 | " + f"{s02_auc:.4f}" + " | " + f"{s02_acc:.1f}" + "% | " + f"{s02_auc-bl_auc:+.4f}" + " | " + f"{s02_acc-bl_acc:+.1f}" + "pp | 强力防御 |\n"
600
+ "| 输出扰动 s=0.01 | " + f"{op001_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | " + f"{op001_auc-bl_auc:+.4f}" + " | 0 | 温和扰动 |\n"
601
+ "| 输出扰动 s=0.015 | " + f"{op0015_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | " + f"{op0015_auc-bl_auc:+.4f}" + " | 0 | 适中扰动 |\n"
602
+ "| 输出扰动 s=0.02 | " + f"{op002_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | " + f"{op002_auc-bl_auc:+.4f}" + " | 0 | 零效用损失有效防御 |\n\n"
603
+ "综合上述实验结果,两类防御策略机制上具有互补性: "
604
+ "标签平滑从训练阶段降低模型的记忆程度,输出扰动从推理阶段遮蔽可被利用的统计信号。"
605
+ "在实际部署中,可根据场景需求灵活选择或组合使用。\n")
606
 
607
  gr.Markdown(
608
  "---\n\n<center>\n\n"