xiaohy commited on
Commit
2f9692c
·
verified ·
1 Parent(s): eb028b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +196 -192
app.py CHANGED
@@ -48,17 +48,14 @@ bl_m_mean = mia_results.get('baseline', {}).get('member_loss_mean', 0.19)
48
  bl_nm_mean = mia_results.get('baseline', {}).get('non_member_loss_mean', 0.23)
49
  bl_m_std = mia_results.get('baseline', {}).get('member_loss_std', 0.03)
50
  bl_nm_std = mia_results.get('baseline', {}).get('non_member_loss_std', 0.03)
51
-
52
  s002_m_mean = mia_results.get('smooth_0.02', {}).get('member_loss_mean', 0.20)
53
  s002_nm_mean = mia_results.get('smooth_0.02', {}).get('non_member_loss_mean', 0.22)
54
  s002_m_std = mia_results.get('smooth_0.02', {}).get('member_loss_std', 0.03)
55
  s002_nm_std = mia_results.get('smooth_0.02', {}).get('non_member_loss_std', 0.03)
56
-
57
  s02_m_mean = mia_results.get('smooth_0.2', {}).get('member_loss_mean', 0.21)
58
  s02_nm_mean = mia_results.get('smooth_0.2', {}).get('non_member_loss_mean', 0.22)
59
  s02_m_std = mia_results.get('smooth_0.2', {}).get('member_loss_std', 0.03)
60
  s02_nm_std = mia_results.get('smooth_0.2', {}).get('non_member_loss_std', 0.03)
61
-
62
  model_name_str = config.get('model_name', 'Qwen/Qwen2.5-Math-1.5B-Instruct')
63
  data_size_str = str(config.get('data_size', 2000))
64
 
@@ -73,29 +70,8 @@ MODEL_PARAMS = {
73
  # Charts
74
  # ========================================
75
 
76
- def make_pie_chart():
77
- tc = {}
78
- for item in member_data + non_member_data:
79
- t = item.get('task_type', 'unknown')
80
- tc[t] = tc.get(t, 0) + 1
81
- nm = {'calculation': 'Calculation\n(Ji Chu Ji Suan)', 'word_problem': 'Word Problem\n(Ying Yong Ti)',
82
- 'concept': 'Concept Q&A\n(Gai Nian Wen Da)', 'error_correction': 'Error Correction\n(Cuo Ti Ding Zheng)'}
83
- labels = [nm.get(k, k) for k in tc]
84
- sizes = list(tc.values())
85
- colors = ['#5B8FF9', '#5AD8A6', '#F6BD16', '#E86452']
86
- fig, ax = plt.subplots(figsize=(6.5, 5.5))
87
- wedges, texts, autotexts = ax.pie(
88
- sizes, labels=labels, autopct='%1.1f%%', colors=colors[:len(labels)],
89
- startangle=90, textprops={'fontsize': 9},
90
- wedgeprops={'edgecolor': 'white', 'linewidth': 2})
91
- for t in autotexts:
92
- t.set_fontsize(10)
93
- t.set_fontweight('bold')
94
- plt.tight_layout()
95
- return fig
96
-
97
-
98
  def make_loss_distribution():
 
99
  items = []
100
  for k, t in [('baseline', 'Baseline'), ('smooth_0.02', 'LS(e=0.02)'), ('smooth_0.2', 'LS(e=0.2)')]:
101
  if k in full_results:
@@ -106,26 +82,59 @@ def make_loss_distribution():
106
  fig, ax = plt.subplots()
107
  ax.text(0.5, 0.5, 'No data', ha='center')
108
  return fig
109
- fig, axes = plt.subplots(1, n, figsize=(4.8 * n, 4.2))
110
  if n == 1:
111
  axes = [axes]
112
  for ax, (k, title) in zip(axes, items):
113
  m = full_results[k]['member_losses']
114
  nm_l = full_results[k]['non_member_losses']
115
- lo = min(min(m), min(nm_l))
116
- hi = max(max(m), max(nm_l))
117
- bins = np.linspace(lo, hi, 30)
118
  ax.hist(m, bins=bins, alpha=0.55, color='#5B8FF9', label='Member', density=True)
119
  ax.hist(nm_l, bins=bins, alpha=0.55, color='#E86452', label='Non-Member', density=True)
120
- ax.set_title(title, fontsize=10, fontweight='bold')
121
- ax.set_xlabel('Loss', fontsize=8)
122
- ax.set_ylabel('Density', fontsize=8)
123
- ax.legend(fontsize=7, loc='upper right')
124
- ax.tick_params(labelsize=7)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  ax.grid(True, linestyle='--', alpha=0.3)
126
  ax.spines['top'].set_visible(False)
127
  ax.spines['right'].set_visible(False)
128
- plt.tight_layout(pad=1.5)
 
129
  return fig
130
 
131
 
@@ -140,50 +149,50 @@ def make_auc_bar():
140
  ('perturbation_0.02', 'OP(s=0.02)', '#1A7F5A')]:
141
  if k in perturb_results:
142
  methods.append(name); aucs.append(perturb_results[k]['auc']); colors.append(c)
143
- fig, ax = plt.subplots(figsize=(9, 5))
144
  bars = ax.bar(methods, aucs, color=colors, width=0.5, edgecolor='white', linewidth=1.5)
145
  for bar, a in zip(bars, aucs):
146
  ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.002,
147
- f'{a:.4f}', ha='center', va='bottom', fontsize=9, fontweight='bold')
148
  ax.axhline(y=0.5, color='#E86452', linestyle='--', linewidth=1.5, alpha=0.6, label='Random Guess (0.5)')
149
- ax.set_ylabel('MIA AUC', fontsize=11)
150
  ax.set_ylim(0.48, max(aucs) + 0.035 if aucs else 0.7)
151
- ax.legend(fontsize=9)
152
  ax.grid(axis='y', linestyle='--', alpha=0.3)
153
  ax.spines['top'].set_visible(False)
154
  ax.spines['right'].set_visible(False)
155
- plt.xticks(rotation=8, fontsize=9)
156
  plt.tight_layout()
157
  return fig
158
 
159
 
160
  def make_tradeoff():
161
- fig, ax = plt.subplots(figsize=(8, 6))
162
  pts = []
163
- for k, name, mk, c, sz in [('baseline', 'Baseline', 'o', '#8C8C8C', 200),
164
- ('smooth_0.02', 'LS(e=0.02)', 's', '#5B8FF9', 180),
165
- ('smooth_0.2', 'LS(e=0.2)', 's', '#3D76DD', 180)]:
166
  if k in mia_results and k in utility_results:
167
  pts.append({'n': name, 'a': mia_results[k]['auc'], 'c': utility_results[k]['accuracy'],
168
  'm': mk, 'co': c, 's': sz})
169
  ba = utility_results.get('baseline', {}).get('accuracy', 0.633)
170
- for k, name, mk, c, sz in [('perturbation_0.01', 'OP(s=0.01)', '^', '#5AD8A6', 190),
171
- ('perturbation_0.015', 'OP(s=0.015)', 'D', '#2EAD78', 150),
172
- ('perturbation_0.02', 'OP(s=0.02)', '^', '#1A7F5A', 190)]:
173
  if k in perturb_results:
174
  pts.append({'n': name, 'a': perturb_results[k]['auc'], 'c': ba, 'm': mk, 'co': c, 's': sz})
175
  for p in pts:
176
  ax.scatter(p['c'], p['a'], label=p['n'], marker=p['m'], color=p['co'],
177
  s=p['s'], edgecolors='white', linewidth=2, zorder=5)
178
  ax.axhline(y=0.5, color='#BFBFBF', linestyle='--', alpha=0.8, label='Random Guess')
179
- ax.set_xlabel('Accuracy', fontsize=11, fontweight='bold')
180
- ax.set_ylabel('MIA AUC (Privacy Risk)', fontsize=11, fontweight='bold')
181
- ax.set_title('Privacy-Utility Trade-off', fontsize=13, fontweight='bold')
182
  aa = [p['c'] for p in pts]; ab = [p['a'] for p in pts]
183
  if aa and ab:
184
  ax.set_xlim(min(aa)-0.03, max(aa)+0.05)
185
  ax.set_ylim(min(min(ab), 0.5)-0.02, max(ab)+0.025)
186
- ax.legend(loc='upper right', fontsize=8, fancybox=True)
187
  ax.grid(True, alpha=0.2)
188
  ax.spines['top'].set_visible(False)
189
  ax.spines['right'].set_visible(False)
@@ -203,54 +212,54 @@ def make_accuracy_bar():
203
  ('perturbation_0.02', 'OP(s=0.02)', '#1A7F5A')]:
204
  if k in perturb_results:
205
  names.append(name); accs.append(bp); colors.append(c)
206
- fig, ax = plt.subplots(figsize=(9, 5))
207
  bars = ax.bar(names, accs, color=colors, width=0.5, edgecolor='white', linewidth=1.5)
208
  for bar, acc in zip(bars, accs):
209
  ax.text(bar.get_x()+bar.get_width()/2, bar.get_height()+0.5,
210
- f'{acc:.1f}%', ha='center', va='bottom', fontsize=9, fontweight='bold')
211
- ax.set_ylabel('Accuracy (%)', fontsize=11)
212
  ax.set_ylim(0, 100)
213
  ax.grid(axis='y', alpha=0.3)
214
  ax.spines['top'].set_visible(False)
215
  ax.spines['right'].set_visible(False)
216
- plt.xticks(rotation=8, fontsize=9)
217
  plt.tight_layout()
218
  return fig
219
 
220
 
221
  def make_loss_gauge(loss_val, m_mean, nm_mean, threshold, m_std, nm_std):
222
- fig, ax = plt.subplots(figsize=(8, 2.8))
223
  x_min = min(m_mean - 3*m_std, loss_val - 0.01)
224
  x_max = max(nm_mean + 3*nm_std, loss_val + 0.01)
225
  ax.axvspan(x_min, threshold, alpha=0.12, color='#5B8FF9')
226
  ax.axvspan(threshold, x_max, alpha=0.12, color='#E86452')
227
  ax.axvline(x=threshold, color='#434343', linewidth=2, linestyle='-', zorder=3)
228
- ax.text(threshold, 1.12, 'Threshold', ha='center', va='bottom', fontsize=9,
229
  fontweight='bold', color='#434343', transform=ax.get_xaxis_transform())
230
  ax.axvline(x=m_mean, color='#5B8FF9', linewidth=1.2, linestyle='--', alpha=0.6)
231
- ax.text(m_mean, -0.28, f'Member\n({m_mean:.4f})', ha='center', va='top',
232
- fontsize=7.5, color='#5B8FF9', transform=ax.get_xaxis_transform())
233
  ax.axvline(x=nm_mean, color='#E86452', linewidth=1.2, linestyle='--', alpha=0.6)
234
- ax.text(nm_mean, -0.28, f'Non-Member\n({nm_mean:.4f})', ha='center', va='top',
235
- fontsize=7.5, color='#E86452', transform=ax.get_xaxis_transform())
236
  in_member = loss_val < threshold
237
  mc = '#5B8FF9' if in_member else '#E86452'
238
  ax.plot(loss_val, 0.5, marker='v', markersize=16, color=mc, zorder=5,
239
  transform=ax.get_xaxis_transform())
240
- ax.text(loss_val, 0.78, f'Loss={loss_val:.4f}', ha='center', va='bottom', fontsize=10,
241
  fontweight='bold', color=mc, transform=ax.get_xaxis_transform(),
242
  bbox=dict(boxstyle='round,pad=0.3', facecolor='white', edgecolor=mc, alpha=0.95))
243
  mc_x = (x_min + threshold) / 2
244
  nmc_x = (threshold + x_max) / 2
245
- ax.text(mc_x, 0.5, 'Member Zone', ha='center', va='center', fontsize=10,
246
  color='#5B8FF9', fontweight='bold', alpha=0.5, transform=ax.get_xaxis_transform())
247
- ax.text(nmc_x, 0.5, 'Non-Member Zone', ha='center', va='center', fontsize=10,
248
  color='#E86452', fontweight='bold', alpha=0.5, transform=ax.get_xaxis_transform())
249
  ax.set_xlim(x_min, x_max)
250
  ax.set_yticks([])
251
  for sp in ['top', 'right', 'left']:
252
  ax.spines[sp].set_visible(False)
253
- ax.set_xlabel('Loss Value', fontsize=9)
254
  plt.tight_layout()
255
  return fig
256
 
@@ -279,35 +288,61 @@ MODEL_CHOICE_MAP = {
279
  "基线模型 (Baseline)": "baseline",
280
  "标签平滑模型 (e=0.02)": "smooth_0.02",
281
  "标签平滑模型 (e=0.2)": "smooth_0.2",
 
 
 
282
  }
283
 
284
 
285
  def run_mia_demo(sample_index, data_type, model_choice):
286
- is_member = (data_type == "成员数据训练集)")
287
  data = member_data if is_member else non_member_data
288
  idx = min(int(sample_index), len(data) - 1)
289
  sample = data[idx]
290
 
291
  model_key = MODEL_CHOICE_MAP.get(model_choice, "baseline")
292
- params = MODEL_PARAMS.get(model_key, MODEL_PARAMS["baseline"])
293
 
294
- fr = full_results.get(model_key, full_results.get('baseline', {}))
295
- if is_member and idx < len(fr.get('member_losses', [])):
296
- loss = fr['member_losses'][idx]
297
- elif not is_member and idx < len(fr.get('non_member_losses', [])):
298
- loss = fr['non_member_losses'][idx]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  else:
300
- loss = float(np.random.normal(params['m_mean'] if is_member else params['nm_mean'], 0.02))
 
 
 
 
 
 
 
 
 
 
 
 
 
301
 
302
- m_mean = params['m_mean']
303
- nm_mean = params['nm_mean']
304
- m_std = params['m_std']
305
- nm_std = params['nm_std']
306
  threshold = (m_mean + nm_mean) / 2.0
307
  pred_member = (loss < threshold)
308
  attack_correct = (pred_member == is_member)
309
 
310
- gauge_fig = make_loss_gauge(loss, m_mean, nm_mean, threshold, m_std, nm_std)
311
 
312
  pred_label = "训练成员" if pred_member else "非训练成员"
313
  pred_color = "🔴" if pred_member else "🟢"
@@ -324,10 +359,9 @@ def run_mia_demo(sample_index, data_type, model_choice):
324
  verdict = "❌ **攻击失误**"
325
  verdict_detail = "攻击者的判定与真实身份不符。"
326
 
327
- model_auc = mia_results.get(model_key, {}).get('auc', 0)
328
  result_md = (
329
  verdict + "\n\n" + verdict_detail + "\n\n"
330
- "**当前攻击模型**: " + params['label'] + " (AUC=" + f"{model_auc:.4f}" + ")\n\n"
331
  "| | 攻击者计算得出 | 系统真实身份 |\n"
332
  "|---|---|---|\n"
333
  "| 判定 | " + pred_color + " " + pred_label + " | " + actual_color + " " + actual_label + " |\n"
@@ -349,28 +383,23 @@ body { background-color: #f0f4f8 !important; }
349
  }
350
  .tab-nav { border-bottom: 2px solid #e1e8f0 !important; margin-bottom: 20px !important; }
351
  .tab-nav button {
352
- font-size: 15px !important; padding: 14px 24px !important; font-weight: 500 !important;
353
  color: #64748b !important; border-radius: 8px 8px 0 0 !important;
354
  transition: all 0.3s ease !important; background: transparent !important; border: none !important;
355
  }
356
- .tab-nav button:hover { color: #3b82f6 !important; background: rgba(59,130,246,0.05) !important; }
357
  .tab-nav button.selected { font-weight: 700 !important; color: #2563eb !important; border-bottom: 3px solid #2563eb !important; }
358
- .tabitem {
359
- background: #fff !important; border-radius: 12px !important;
360
- box-shadow: 0 4px 20px rgba(0,0,0,0.04) !important; padding: 30px !important; border: 1px solid #e2e8f0 !important;
361
- }
362
- .prose h1 { font-size: 2.2rem !important; color: #0f172a !important; font-weight: 800 !important; text-align: center !important; }
363
- .prose h2 { font-size: 1.4rem !important; color: #1e293b !important; margin-top: 1.5em !important; padding-bottom: 0.4em !important; border-bottom: 2px solid #f1f5f9 !important; font-weight: 700 !important; }
364
- .prose h3 { font-size: 1.15rem !important; color: #334155 !important; font-weight: 600 !important; }
365
- .prose table { width: 100% !important; border-collapse: separate !important; border-spacing: 0 !important; margin: 1.5em 0 !important; border-radius: 10px !important; overflow: hidden !important; box-shadow: 0 0 0 1px #e2e8f0, 0 4px 6px -1px rgba(0,0,0,0.05) !important; font-size: 0.92rem !important; }
366
- .prose th { background: #f8fafc !important; color: #475569 !important; font-weight: 600 !important; font-size: 0.85rem !important; letter-spacing: 0.05em !important; padding: 12px 14px !important; border-bottom: 2px solid #e2e8f0 !important; }
367
  .prose tr:nth-child(even) td { background: #f8fafc !important; }
368
- .prose td { padding: 10px 14px !important; color: #334155 !important; border-bottom: 1px solid #e2e8f0 !important; }
369
  .prose tr:last-child td { border-bottom: none !important; }
370
- .prose tr:hover td { background-color: #f1f5f9 !important; }
371
- .prose blockquote { border-left: 4px solid #3b82f6 !important; background: linear-gradient(to right,#eff6ff,#fff) !important; padding: 14px 18px !important; border-radius: 0 8px 8px 0 !important; font-size: 0.93rem !important; color: #1e40af !important; margin: 1.5em 0 !important; }
372
  button.primary { background: linear-gradient(135deg,#3b82f6 0%,#2563eb 100%) !important; border: none !important; box-shadow: 0 4px 12px rgba(37,99,235,0.25) !important; font-weight: 600 !important; }
373
- button.primary:hover { transform: translateY(-2px) !important; box-shadow: 0 6px 16px rgba(37,99,235,0.35) !important; }
374
  footer { display: none !important; }
375
  """
376
 
@@ -385,9 +414,8 @@ with gr.Blocks(title="教育大模型隐私攻防", theme=gr.themes.Soft(
385
  with gr.Tab("项目概览"):
386
  gr.Markdown(
387
  "## 研究背景\n\n"
388
- "大语言模型在教育领域的应用日益广泛,模型训练不可避免地接触到学生敏感数据。"
389
- "**成员推理攻击 (Membership Inference Attack, MIA)** 能判断某条数据是否参与了模型训练,"
390
- "从而推断学生的隐私信息,构成切实的隐私威胁。\n\n"
391
  "---\n\n"
392
  "## 实验设计\n\n"
393
  "| 阶段 | 内容 | 方法 |\n"
@@ -395,10 +423,9 @@ with gr.Blocks(title="教育大模型隐私攻防", theme=gr.themes.Soft(
395
  "| 1. 数据准备 | 2000条小学数学辅导对话 | 模板化生成,含姓名/学号/成绩等隐私字段 |\n"
396
  "| 2. 基线模型训练 | Qwen2.5-Math-1.5B + LoRA | 标准微调,无任何防御措施 |\n"
397
  "| 3. 标签平滑模型训练 | 两组不同平滑系数 | e=0.02(温和) 与 e=0.2(强力) 分别训练 |\n"
398
- "| 4. MIA攻击测试 | 对三个模型分别发起攻击 | 基于Loss阈值成员推理,AUC评估 |\n"
399
- "| 5. 输出扰动测试 | 在基线模型上添加噪声 | 高斯噪声 s=0.01/0.015/0.02 三组 |\n"
400
- "| 6. 效用评估 | 300道数学测试题 | 三个模型分别测试准确率 |\n"
401
- "| 7. 综合分析 | 隐私-效用权衡 | 散点图 + 定量对比 |\n\n"
402
  "---\n\n"
403
  "## 实验配置\n\n"
404
  "| 项目 | 值 |\n"
@@ -407,35 +434,42 @@ with gr.Blocks(title="教育大模型隐私攻防", theme=gr.themes.Soft(
407
  "| 微调方法 | LoRA (r=8, alpha=16, target: q/k/v/o_proj) |\n"
408
  "| 训练轮数 | 10 epochs |\n"
409
  "| 数据总量 | " + data_size_str + " 条 (成员1000 + 非成员1000) |\n"
410
- "| 训练模型数 | 3个 (基线 + 标签平滑x2) |\n")
 
411
 
412
  with gr.Tab("数据展示"):
413
  gr.Markdown("## 数据集概况\n\n"
414
- "成员数据1000条(训练集)与非成员数据1000条(对照组),每条均包含学生隐私字段。\n")
 
 
 
 
 
 
 
415
  with gr.Row():
416
- with gr.Column(scale=1):
417
- gr.Plot(value=make_pie_chart())
418
  with gr.Column(scale=1):
419
  gr.Markdown("**选择靶向数据池**")
420
  data_sel = gr.Radio(["成员数据(训练集)", "非成员数据(测试集)"],
421
  value="成员数据(训练集)", label="")
422
  sample_btn = gr.Button("随机提取", variant="primary")
423
  sample_info = gr.Markdown()
424
- gr.Markdown("---\n\n**原始对话内容**")
425
- with gr.Row():
426
- sample_q = gr.Textbox(label="学生提问 (Prompt)", lines=5, interactive=False)
427
- sample_a = gr.Textbox(label="模型回答 (Ground Truth)", lines=5, interactive=False)
428
  sample_btn.click(show_random_sample, [data_sel], [sample_info, sample_q, sample_a])
429
 
430
  with gr.Tab("MIA攻击演示"):
431
  gr.Markdown(
432
  "## 发起成员推理攻击\n\n"
433
- "选择目标模型和数据来源,系统将计算该样本的Loss值并实施成员身份判定。\n")
434
  with gr.Row():
435
  with gr.Column(scale=1):
436
  atk_model = gr.Radio(
437
- ["基线模型 (Baseline)", "标签平滑模型 (e=0.02)", "标签平滑模型 (e=0.2)"],
438
- value="基线模型 (Baseline)", label="选择攻击目标模型")
 
439
  atk_type = gr.Radio(["成员数据(训练集)", "非成员数据(测试集)"],
440
  value="成员数据(训练集)", label="模拟真实数据来源")
441
  atk_idx = gr.Slider(0, 999, step=1, value=0, label="样本游标 ID (0-999)")
@@ -450,19 +484,16 @@ with gr.Blocks(title="教育大模型隐私攻防", theme=gr.themes.Soft(
450
  with gr.Tab("防御对比"):
451
  gr.Markdown(
452
  "## 防御策略效果对比\n\n"
453
- "本研究测试了两类防御策略,以下基于实验数据给出对比分析。\n\n"
454
  "| 策略 | 类型 | 原理 | 实验验证的优势 | 实验观察到的局限 |\n"
455
  "|------|------|------|---------------|----------------|\n"
456
- "| 标签平滑 | 训练期 | 软化训练标签,抑制对训练数据的过度记忆 | e=0.02时AUC降至" + f"{s002_auc:.4f}" + ",准确率提升至" + f"{s002_acc:.1f}" + "% | 需重新训练模型;e过大时可能影响效用 |\n"
457
- "| 输出扰动 | 推理期 | 对模型输出Loss添加高斯噪声,模糊统计差异 | s=0.02时AUC降至" + f"{op002_auc:.4f}" + ",准确率完全不变 | 仅遮蔽Loss层面的统计信号,不改变模型本身的记忆特性 |\n")
458
-
459
- with gr.Row():
460
- with gr.Column():
461
- gr.Markdown("### AUC对比(全部策略)")
462
- gr.Plot(value=make_auc_bar())
463
- with gr.Column():
464
- gr.Markdown("### Loss分布对比(三个模型)")
465
- gr.Plot(value=make_loss_distribution())
466
 
467
  tbl = (
468
  "### 完整实验结果\n\n"
@@ -487,10 +518,9 @@ with gr.Blocks(title="教育大模型隐私攻防", theme=gr.themes.Soft(
487
  gr.Markdown(
488
  "## 一、标签平滑 (Label Smoothing)\n\n"
489
  "**类型**: 训��期防御\n\n"
490
- "将训练标签从硬标签 (one-hot) 转换为软标签,降低模型对训练样本的过度拟合程度,"
491
- "从而缩小成员与非成员之间的Loss分布差异。\n\n"
492
- "$$y_{smooth} = (1 - \\varepsilon) \\cdot y_{onehot} + \\frac{\\varepsilon}{V}$$\n\n"
493
- "其中 $\\varepsilon$ 为平滑系数,$V$ 为词汇表大小。\n\n"
494
  "| 参数 | AUC | 准确率 | 分析 |\n"
495
  "|------|-----|--------|------|\n"
496
  "| 基线 (e=0) | " + f"{bl_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | 无防御,攻击风险较高 |\n"
@@ -499,9 +529,9 @@ with gr.Blocks(title="教育大模型隐私攻防", theme=gr.themes.Soft(
499
  "---\n\n"
500
  "## 二、输出扰动 (Output Perturbation)\n\n"
501
  "**类型**: 推理期防御\n\n"
502
- "在推理阶段对模型返回的Loss值注入高斯噪声,使攻击者难以从Loss的微小差异中区分成员与非成员。\n\n"
503
- "$$\\mathcal{L}_{perturbed} = \\mathcal{L}_{original} + \\mathcal{N}(0, \\sigma^2)$$\n\n"
504
- "其中 $\\sigma$ 为噪声标准差,控制扰动强度。\n\n"
505
  "| 参数 | AUC | AUC降幅 | 准确率 |\n"
506
  "|------|-----|---------|--------|\n"
507
  "| 基线 (s=0) | " + f"{bl_auc:.4f}" + " | -- | " + f"{bl_acc:.1f}" + "% |\n"
@@ -515,14 +545,11 @@ with gr.Blocks(title="教育大模型隐私攻防", theme=gr.themes.Soft(
515
  "| 作用阶段 | 训练期 | 推理期 |\n"
516
  "| 是否需要重训 | 是 | 否 |\n"
517
  "| 对效用的影响 | 取决于平滑系数 | 无影响 |\n"
518
- "| 防御原理 | 抑制过拟合,降低记忆 | 遮蔽Loss层面统计信号 |\n"
519
- "| 部署难度 | 需训练阶段介入 | 推理阶段即插即用 |\n"
520
- "| 可叠加使用 | 是 | 是 |\n")
521
 
522
  with gr.Tab("效用评估"):
523
- gr.Markdown(
524
- "## 效用评估\n\n"
525
- "> 测试集: 300道数学题,覆盖基础计算、应用题、概念问答三类任务。\n")
526
  with gr.Row():
527
  with gr.Column():
528
  gr.Markdown("### 准确率对比")
@@ -530,83 +557,60 @@ with gr.Blocks(title="教育大模型隐私攻防", theme=gr.themes.Soft(
530
  with gr.Column():
531
  gr.Markdown("### 隐私-效用权衡")
532
  gr.Plot(value=make_tradeoff())
533
-
534
  gr.Markdown(
535
  "### 效用分析\n\n"
536
  "| 策略 | 准确率 | AUC | 效用变化 | 分析 |\n"
537
  "|------|--------|-----|---------|------|\n"
538
- "| 基线 | " + f"{bl_acc:.1f}" + "% | " + f"{bl_auc:.4f}" + " | -- | 效用基准,隐私风险最高 |\n"
539
- "| LS(e=0.02) | " + f"{s002_acc:.1f}" + "% | " + f"{s002_auc:.4f}" + " | " + f"{s002_acc-bl_acc:+.1f}" + "pp | 适度正则化提升泛化能力,准确率反而上升 |\n"
540
- "| LS(e=0.2) | " + f"{s02_acc:.1f}" + "% | " + f"{s02_auc:.4f}" + " | " + f"{s02_acc-bl_acc:+.1f}" + "pp | 强力平滑对效用有一定影响,但可接受范围 |\n"
541
  "| OP(s=0.01) | " + f"{bl_acc:.1f}" + "% | " + f"{op001_auc:.4f}" + " | 0 | 零效用损失 |\n"
542
  "| OP(s=0.015) | " + f"{bl_acc:.1f}" + "% | " + f"{op0015_auc:.4f}" + " | 0 | 零效用损失 |\n"
543
  "| OP(s=0.02) | " + f"{bl_acc:.1f}" + "% | " + f"{op002_auc:.4f}" + " | 0 | 零效用损失 |\n\n"
544
- "> **关键发现**: 标签平滑 e=0.02 不仅降低了隐私风险,还因正则化效应提升了模型的泛化能力。"
545
- "输出扰动完全不影响效用的前提下实现了有效防御。"
546
- "两类策略在效用维度上呈现互补特性:前者可能提升效用,后者保证效用不变。\n")
547
 
548
  with gr.Tab("实验结果可视化"):
549
  gr.Markdown("## 实验核心图表")
550
- for fn, cap in [("fig1_loss_distribution_comparison.png", "图1: 成员与非成员Loss分布对比 (Baseline vs Label Smoothing)"),
551
- ("fig2_privacy_utility_tradeoff_fixed.png", "图2: 隐私风险与模型效用权衡散点图"),
552
  ("fig3_defense_comparison_bar.png", "图3: 各防御策略MIA攻击AUC对比")]:
553
  p = os.path.join(BASE_DIR, "figures", fn)
554
  if os.path.exists(p):
555
  gr.Markdown("### " + cap)
556
- gr.Image(value=p, show_label=False, height=400)
557
  gr.Markdown("---")
558
 
559
  with gr.Tab("研究结论"):
560
  gr.Markdown(
561
- "## 研究结论\n\n"
562
- "---\n\n"
563
  "### 一、教育大模型面临显著的成员推理攻击风险\n\n"
564
- "实验结果表明,经LoRA微调的Qwen2.5-Math-1.5B教育辅导模型在面对基于Loss的成员推理攻击时,"
565
- "AUC达到 **" + f"{bl_auc:.4f}" + "**,显著高随机猜测基准 (0.5)"
566
- "成员数据的平均Loss (" + f"{bl_m_mean:.4f}" + ") 明显低于非成员数据 (" + f"{bl_nm_mean:.4f}" + "),"
567
- "表明模型对训练数据产生了可被利用记忆应。"
568
- "在教育场景中训练数据包含学生姓名、学号、学业成绩等敏感信息,"
569
- "该攻击能力构成了切实的隐私威胁。\n\n"
570
- "---\n\n"
571
- "### 标签平滑作为训练期防御策略有效性与局限性\n\n"
572
- "标签平滑通过软化训练标签分布,抑制模型对训练样本的过度拟合,"
573
- "缩小成员与非成员之间的Loss分布差异。实验观察到:\n\n"
574
- "- **e=0.02** (温和平滑): AUC从 " + f"{bl_auc:.4f}" + " 降至 " + f"{s002_auc:.4f}"
575
- + ",准确率为 " + f"{s002_acc:.1f}" + "%。"
576
- "适度的正则化效应不仅降低了隐私风险,还提升了模型的泛化能力。\n"
577
- "- **e=0.2** (强力平滑): AUC进一步降至 " + f"{s02_auc:.4f}"
578
- + ",防御效果显著增强,准确率为 " + f"{s02_acc:.1f}" + "%。\n\n"
579
- "该结果表明平滑系数的选取需在隐私保护强度与模型效用之间进行权衡。"
580
- "从实验数据看,e=0.02在两者之间取得了较好的平衡点。\n\n"
581
- "---\n\n"
582
- "### 三、输出扰动作为推理期防御策略的独特优势\n\n"
583
- "输出扰动在推理阶段对模型输出的Loss值注入高斯噪声,"
584
- "核心优势在于完全不改变模型参数,因此对模型效用无任何影响。实验中测试了三组噪声强度:\n\n"
585
- "| 噪声强度 | AUC | AUC降幅 | 准确率 |\n"
586
- "|----------|-----|---------|--------|\n"
587
  "| s=0.01 | " + f"{op001_auc:.4f}" + " | " + f"{bl_auc-op001_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% (不变) |\n"
588
  "| s=0.015 | " + f"{op0015_auc:.4f}" + " | " + f"{bl_auc-op0015_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% (不变) |\n"
589
  "| s=0.02 | " + f"{op002_auc:.4f}" + " | " + f"{bl_auc-op002_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% (不变) |\n\n"
590
- "随着噪声强度增大,AUC呈单调下降趋势,表明更强的扰动更有地模糊了成员与非成员的计差异。"
591
- "s=0.02时AUC降至 " + f"{op002_auc:.4f}" + ",接近标签平滑 e=0.2 的防御效果,"
592
- "但完全不需要重新训练模型,适合已部署系统的后期隐私加固。\n\n"
593
- "---\n\n"
594
  "### 四、隐私-效用权衡的定量分析\n\n"
595
- "| 策略 | AUC | 准确率 | AUC变化 | 效用变化 | 特点 |\n"
596
- "|------|-----|--------|--------|---------|------|\n"
597
- "| 基线 (无防御) | " + f"{bl_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | -- | -- | 风险最高 |\n"
598
- "| 标签平滑 e=0.02 | " + f"{s002_auc:.4f}" + " | " + f"{s002_acc:.1f}" + "% | " + f"{s002_auc-bl_auc:+.4f}" + " | " + f"{s002_acc-bl_acc:+.1f}" + "pp | 隐私与效用双优 |\n"
599
- "| 标签平滑 e=0.2 | " + f"{s02_auc:.4f}" + " | " + f"{s02_acc:.1f}" + "% | " + f"{s02_auc-bl_auc:+.4f}" + " | " + f"{s02_acc-bl_acc:+.1f}" + "pp | 强力防御 |\n"
600
- "| 输出扰动 s=0.01 | " + f"{op001_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | " + f"{op001_auc-bl_auc:+.4f}" + " | 0 | 温和扰动 |\n"
601
- "| 输出扰动 s=0.015 | " + f"{op0015_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | " + f"{op0015_auc-bl_auc:+.4f}" + " | 0 | 适中扰动 |\n"
602
- "| 输出扰动 s=0.02 | " + f"{op002_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | " + f"{op002_auc-bl_auc:+.4f}" + " | 0 | 零效用损失的有效防御 |\n\n"
603
- "综合上述实验结果,两类防御策略在机制上具有互补性: "
604
- "标签平滑从训练阶段降低模型的记忆程度,输出扰动从推理阶段遮蔽被利用的统计信号。"
605
- "在实际部署中,可根据场景需求灵活选择或组合使用。\n")
606
-
607
- gr.Markdown(
608
- "---\n\n<center>\n\n"
609
- "教育大模型中的成员推理攻击及其防御思路研究\n\n"
610
- "</center>\n")
611
 
612
  demo.launch()
 
48
  bl_nm_mean = mia_results.get('baseline', {}).get('non_member_loss_mean', 0.23)
49
  bl_m_std = mia_results.get('baseline', {}).get('member_loss_std', 0.03)
50
  bl_nm_std = mia_results.get('baseline', {}).get('non_member_loss_std', 0.03)
 
51
  s002_m_mean = mia_results.get('smooth_0.02', {}).get('member_loss_mean', 0.20)
52
  s002_nm_mean = mia_results.get('smooth_0.02', {}).get('non_member_loss_mean', 0.22)
53
  s002_m_std = mia_results.get('smooth_0.02', {}).get('member_loss_std', 0.03)
54
  s002_nm_std = mia_results.get('smooth_0.02', {}).get('non_member_loss_std', 0.03)
 
55
  s02_m_mean = mia_results.get('smooth_0.2', {}).get('member_loss_mean', 0.21)
56
  s02_nm_mean = mia_results.get('smooth_0.2', {}).get('non_member_loss_mean', 0.22)
57
  s02_m_std = mia_results.get('smooth_0.2', {}).get('member_loss_std', 0.03)
58
  s02_nm_std = mia_results.get('smooth_0.2', {}).get('non_member_loss_std', 0.03)
 
59
  model_name_str = config.get('model_name', 'Qwen/Qwen2.5-Math-1.5B-Instruct')
60
  data_size_str = str(config.get('data_size', 2000))
61
 
 
70
  # Charts
71
  # ========================================
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  def make_loss_distribution():
74
+ """3 model Loss distributions - larger size"""
75
  items = []
76
  for k, t in [('baseline', 'Baseline'), ('smooth_0.02', 'LS(e=0.02)'), ('smooth_0.2', 'LS(e=0.2)')]:
77
  if k in full_results:
 
82
  fig, ax = plt.subplots()
83
  ax.text(0.5, 0.5, 'No data', ha='center')
84
  return fig
85
+ fig, axes = plt.subplots(1, n, figsize=(6 * n, 5.5))
86
  if n == 1:
87
  axes = [axes]
88
  for ax, (k, title) in zip(axes, items):
89
  m = full_results[k]['member_losses']
90
  nm_l = full_results[k]['non_member_losses']
91
+ bins = np.linspace(min(min(m), min(nm_l)), max(max(m), max(nm_l)), 30)
 
 
92
  ax.hist(m, bins=bins, alpha=0.55, color='#5B8FF9', label='Member', density=True)
93
  ax.hist(nm_l, bins=bins, alpha=0.55, color='#E86452', label='Non-Member', density=True)
94
+ ax.set_title(title, fontsize=13, fontweight='bold')
95
+ ax.set_xlabel('Loss', fontsize=11)
96
+ ax.set_ylabel('Density', fontsize=11)
97
+ ax.legend(fontsize=10, loc='upper right')
98
+ ax.tick_params(labelsize=10)
99
+ ax.grid(True, linestyle='--', alpha=0.3)
100
+ ax.spines['top'].set_visible(False)
101
+ ax.spines['right'].set_visible(False)
102
+ plt.suptitle('Model Loss Distribution: Member vs Non-Member', fontsize=15, fontweight='bold', y=1.02)
103
+ plt.tight_layout()
104
+ return fig
105
+
106
+
107
+ def make_perturb_loss_distribution():
108
+ """Output perturbation effect on baseline loss distribution"""
109
+ bl = full_results.get('baseline', {})
110
+ if not bl:
111
+ fig, ax = plt.subplots()
112
+ ax.text(0.5, 0.5, 'No data', ha='center')
113
+ return fig
114
+ m_losses = np.array(bl['member_losses'])
115
+ nm_losses = np.array(bl['non_member_losses'])
116
+ sigmas = [0.01, 0.015, 0.02]
117
+ fig, axes = plt.subplots(1, 3, figsize=(18, 5.5))
118
+ for ax, sigma in zip(axes, sigmas):
119
+ np.random.seed(42)
120
+ m_pert = m_losses + np.random.normal(0, sigma, len(m_losses))
121
+ nm_pert = nm_losses + np.random.normal(0, sigma, len(nm_losses))
122
+ all_vals = np.concatenate([m_pert, nm_pert])
123
+ bins = np.linspace(all_vals.min(), all_vals.max(), 30)
124
+ ax.hist(m_pert, bins=bins, alpha=0.55, color='#5B8FF9', label='Member (perturbed)', density=True)
125
+ ax.hist(nm_pert, bins=bins, alpha=0.55, color='#E86452', label='Non-Member (perturbed)', density=True)
126
+ pk = 'perturbation_' + str(sigma)
127
+ pauc = perturb_results.get(pk, {}).get('auc', 0)
128
+ ax.set_title(f'OP(s={sigma})\nAUC={pauc:.4f}', fontsize=13, fontweight='bold')
129
+ ax.set_xlabel('Loss', fontsize=11)
130
+ ax.set_ylabel('Density', fontsize=11)
131
+ ax.legend(fontsize=9, loc='upper right')
132
+ ax.tick_params(labelsize=10)
133
  ax.grid(True, linestyle='--', alpha=0.3)
134
  ax.spines['top'].set_visible(False)
135
  ax.spines['right'].set_visible(False)
136
+ plt.suptitle('Output Perturbation: Loss Distribution After Adding Noise', fontsize=15, fontweight='bold', y=1.02)
137
+ plt.tight_layout()
138
  return fig
139
 
140
 
 
149
  ('perturbation_0.02', 'OP(s=0.02)', '#1A7F5A')]:
150
  if k in perturb_results:
151
  methods.append(name); aucs.append(perturb_results[k]['auc']); colors.append(c)
152
+ fig, ax = plt.subplots(figsize=(12, 6))
153
  bars = ax.bar(methods, aucs, color=colors, width=0.5, edgecolor='white', linewidth=1.5)
154
  for bar, a in zip(bars, aucs):
155
  ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.002,
156
+ f'{a:.4f}', ha='center', va='bottom', fontsize=11, fontweight='bold')
157
  ax.axhline(y=0.5, color='#E86452', linestyle='--', linewidth=1.5, alpha=0.6, label='Random Guess (0.5)')
158
+ ax.set_ylabel('MIA AUC', fontsize=12)
159
  ax.set_ylim(0.48, max(aucs) + 0.035 if aucs else 0.7)
160
+ ax.legend(fontsize=10)
161
  ax.grid(axis='y', linestyle='--', alpha=0.3)
162
  ax.spines['top'].set_visible(False)
163
  ax.spines['right'].set_visible(False)
164
+ plt.xticks(fontsize=11)
165
  plt.tight_layout()
166
  return fig
167
 
168
 
169
  def make_tradeoff():
170
+ fig, ax = plt.subplots(figsize=(10, 7))
171
  pts = []
172
+ for k, name, mk, c, sz in [('baseline', 'Baseline', 'o', '#8C8C8C', 220),
173
+ ('smooth_0.02', 'LS(e=0.02)', 's', '#5B8FF9', 200),
174
+ ('smooth_0.2', 'LS(e=0.2)', 's', '#3D76DD', 200)]:
175
  if k in mia_results and k in utility_results:
176
  pts.append({'n': name, 'a': mia_results[k]['auc'], 'c': utility_results[k]['accuracy'],
177
  'm': mk, 'co': c, 's': sz})
178
  ba = utility_results.get('baseline', {}).get('accuracy', 0.633)
179
+ for k, name, mk, c, sz in [('perturbation_0.01', 'OP(s=0.01)', '^', '#5AD8A6', 200),
180
+ ('perturbation_0.015', 'OP(s=0.015)', 'D', '#2EAD78', 160),
181
+ ('perturbation_0.02', 'OP(s=0.02)', '^', '#1A7F5A', 200)]:
182
  if k in perturb_results:
183
  pts.append({'n': name, 'a': perturb_results[k]['auc'], 'c': ba, 'm': mk, 'co': c, 's': sz})
184
  for p in pts:
185
  ax.scatter(p['c'], p['a'], label=p['n'], marker=p['m'], color=p['co'],
186
  s=p['s'], edgecolors='white', linewidth=2, zorder=5)
187
  ax.axhline(y=0.5, color='#BFBFBF', linestyle='--', alpha=0.8, label='Random Guess')
188
+ ax.set_xlabel('Accuracy', fontsize=12, fontweight='bold')
189
+ ax.set_ylabel('MIA AUC (Privacy Risk)', fontsize=12, fontweight='bold')
190
+ ax.set_title('Privacy-Utility Trade-off', fontsize=14, fontweight='bold')
191
  aa = [p['c'] for p in pts]; ab = [p['a'] for p in pts]
192
  if aa and ab:
193
  ax.set_xlim(min(aa)-0.03, max(aa)+0.05)
194
  ax.set_ylim(min(min(ab), 0.5)-0.02, max(ab)+0.025)
195
+ ax.legend(loc='upper right', fontsize=9, fancybox=True)
196
  ax.grid(True, alpha=0.2)
197
  ax.spines['top'].set_visible(False)
198
  ax.spines['right'].set_visible(False)
 
212
  ('perturbation_0.02', 'OP(s=0.02)', '#1A7F5A')]:
213
  if k in perturb_results:
214
  names.append(name); accs.append(bp); colors.append(c)
215
+ fig, ax = plt.subplots(figsize=(12, 6))
216
  bars = ax.bar(names, accs, color=colors, width=0.5, edgecolor='white', linewidth=1.5)
217
  for bar, acc in zip(bars, accs):
218
  ax.text(bar.get_x()+bar.get_width()/2, bar.get_height()+0.5,
219
+ f'{acc:.1f}%', ha='center', va='bottom', fontsize=11, fontweight='bold')
220
+ ax.set_ylabel('Accuracy (%)', fontsize=12)
221
  ax.set_ylim(0, 100)
222
  ax.grid(axis='y', alpha=0.3)
223
  ax.spines['top'].set_visible(False)
224
  ax.spines['right'].set_visible(False)
225
+ plt.xticks(fontsize=11)
226
  plt.tight_layout()
227
  return fig
228
 
229
 
230
  def make_loss_gauge(loss_val, m_mean, nm_mean, threshold, m_std, nm_std):
231
+ fig, ax = plt.subplots(figsize=(9, 3))
232
  x_min = min(m_mean - 3*m_std, loss_val - 0.01)
233
  x_max = max(nm_mean + 3*nm_std, loss_val + 0.01)
234
  ax.axvspan(x_min, threshold, alpha=0.12, color='#5B8FF9')
235
  ax.axvspan(threshold, x_max, alpha=0.12, color='#E86452')
236
  ax.axvline(x=threshold, color='#434343', linewidth=2, linestyle='-', zorder=3)
237
+ ax.text(threshold, 1.12, 'Threshold', ha='center', va='bottom', fontsize=10,
238
  fontweight='bold', color='#434343', transform=ax.get_xaxis_transform())
239
  ax.axvline(x=m_mean, color='#5B8FF9', linewidth=1.2, linestyle='--', alpha=0.6)
240
+ ax.text(m_mean, -0.3, f'Member\n({m_mean:.4f})', ha='center', va='top',
241
+ fontsize=8, color='#5B8FF9', transform=ax.get_xaxis_transform())
242
  ax.axvline(x=nm_mean, color='#E86452', linewidth=1.2, linestyle='--', alpha=0.6)
243
+ ax.text(nm_mean, -0.3, f'Non-Member\n({nm_mean:.4f})', ha='center', va='top',
244
+ fontsize=8, color='#E86452', transform=ax.get_xaxis_transform())
245
  in_member = loss_val < threshold
246
  mc = '#5B8FF9' if in_member else '#E86452'
247
  ax.plot(loss_val, 0.5, marker='v', markersize=16, color=mc, zorder=5,
248
  transform=ax.get_xaxis_transform())
249
+ ax.text(loss_val, 0.78, f'Loss={loss_val:.4f}', ha='center', va='bottom', fontsize=11,
250
  fontweight='bold', color=mc, transform=ax.get_xaxis_transform(),
251
  bbox=dict(boxstyle='round,pad=0.3', facecolor='white', edgecolor=mc, alpha=0.95))
252
  mc_x = (x_min + threshold) / 2
253
  nmc_x = (threshold + x_max) / 2
254
+ ax.text(mc_x, 0.5, 'Member Zone', ha='center', va='center', fontsize=11,
255
  color='#5B8FF9', fontweight='bold', alpha=0.5, transform=ax.get_xaxis_transform())
256
+ ax.text(nmc_x, 0.5, 'Non-Member Zone', ha='center', va='center', fontsize=11,
257
  color='#E86452', fontweight='bold', alpha=0.5, transform=ax.get_xaxis_transform())
258
  ax.set_xlim(x_min, x_max)
259
  ax.set_yticks([])
260
  for sp in ['top', 'right', 'left']:
261
  ax.spines[sp].set_visible(False)
262
+ ax.set_xlabel('Loss Value', fontsize=10)
263
  plt.tight_layout()
264
  return fig
265
 
 
288
  "基线模型 (Baseline)": "baseline",
289
  "标签平滑模型 (e=0.02)": "smooth_0.02",
290
  "标签平滑模型 (e=0.2)": "smooth_0.2",
291
+ "输出扰动 (s=0.01)": "perturbation_0.01",
292
+ "输出扰动 (s=0.015)": "perturbation_0.015",
293
+ "输出扰动 (s=0.02)": "perturbation_0.02",
294
  }
295
 
296
 
297
  def run_mia_demo(sample_index, data_type, model_choice):
298
+ is_member = (data_type == "成员数据��训练集)")
299
  data = member_data if is_member else non_member_data
300
  idx = min(int(sample_index), len(data) - 1)
301
  sample = data[idx]
302
 
303
  model_key = MODEL_CHOICE_MAP.get(model_choice, "baseline")
 
304
 
305
+ # Determine which Loss data to use
306
+ is_perturb = model_key.startswith("perturbation_")
307
+ if is_perturb:
308
+ # Output perturbation: baseline loss + noise
309
+ sigma = float(model_key.split("_")[1])
310
+ base_fr = full_results.get('baseline', {})
311
+ if is_member and idx < len(base_fr.get('member_losses', [])):
312
+ base_loss = base_fr['member_losses'][idx]
313
+ elif not is_member and idx < len(base_fr.get('non_member_losses', [])):
314
+ base_loss = base_fr['non_member_losses'][idx]
315
+ else:
316
+ base_loss = float(np.random.normal(bl_m_mean if is_member else bl_nm_mean, 0.02))
317
+ np.random.seed(idx * 1000 + int(sigma * 1000))
318
+ loss = base_loss + np.random.normal(0, sigma)
319
+ m_mean = bl_m_mean
320
+ nm_mean = bl_nm_mean
321
+ m_std_v = bl_m_std
322
+ nm_std_v = bl_nm_std
323
+ model_auc = perturb_results.get(model_key, {}).get('auc', 0)
324
+ display_label = "OP(s=" + str(sigma) + ")"
325
  else:
326
+ params = MODEL_PARAMS.get(model_key, MODEL_PARAMS["baseline"])
327
+ fr = full_results.get(model_key, full_results.get('baseline', {}))
328
+ if is_member and idx < len(fr.get('member_losses', [])):
329
+ loss = fr['member_losses'][idx]
330
+ elif not is_member and idx < len(fr.get('non_member_losses', [])):
331
+ loss = fr['non_member_losses'][idx]
332
+ else:
333
+ loss = float(np.random.normal(params['m_mean'] if is_member else params['nm_mean'], 0.02))
334
+ m_mean = params['m_mean']
335
+ nm_mean = params['nm_mean']
336
+ m_std_v = params['m_std']
337
+ nm_std_v = params['nm_std']
338
+ model_auc = mia_results.get(model_key, {}).get('auc', 0)
339
+ display_label = params['label']
340
 
 
 
 
 
341
  threshold = (m_mean + nm_mean) / 2.0
342
  pred_member = (loss < threshold)
343
  attack_correct = (pred_member == is_member)
344
 
345
+ gauge_fig = make_loss_gauge(loss, m_mean, nm_mean, threshold, m_std_v, nm_std_v)
346
 
347
  pred_label = "训练成员" if pred_member else "非训练成员"
348
  pred_color = "🔴" if pred_member else "🟢"
 
359
  verdict = "❌ **攻击失误**"
360
  verdict_detail = "攻击者的判定与真实身份不符。"
361
 
 
362
  result_md = (
363
  verdict + "\n\n" + verdict_detail + "\n\n"
364
+ "**当前攻击模型**: " + display_label + " (AUC=" + f"{model_auc:.4f}" + ")\n\n"
365
  "| | 攻击者计算得出 | 系统真实身份 |\n"
366
  "|---|---|---|\n"
367
  "| 判定 | " + pred_color + " " + pred_label + " | " + actual_color + " " + actual_label + " |\n"
 
383
  }
384
  .tab-nav { border-bottom: 2px solid #e1e8f0 !important; margin-bottom: 20px !important; }
385
  .tab-nav button {
386
+ font-size: 15px !important; padding: 14px 22px !important; font-weight: 500 !important;
387
  color: #64748b !important; border-radius: 8px 8px 0 0 !important;
388
  transition: all 0.3s ease !important; background: transparent !important; border: none !important;
389
  }
390
+ .tab-nav button:hover { color: #3b82f6 !important; }
391
  .tab-nav button.selected { font-weight: 700 !important; color: #2563eb !important; border-bottom: 3px solid #2563eb !important; }
392
+ .tabitem { background: #fff !important; border-radius: 12px !important; box-shadow: 0 4px 20px rgba(0,0,0,0.04) !important; padding: 30px !important; border: 1px solid #e2e8f0 !important; }
393
+ .prose h1 { font-size: 2rem !important; color: #0f172a !important; font-weight: 800 !important; text-align: center !important; }
394
+ .prose h2 { font-size: 1.35rem !important; color: #1e293b !important; margin-top: 1.5em !important; padding-bottom: 0.4em !important; border-bottom: 2px solid #f1f5f9 !important; font-weight: 700 !important; }
395
+ .prose h3 { font-size: 1.1rem !important; color: #334155 !important; font-weight: 600 !important; }
396
+ .prose table { width: 100% !important; border-collapse: separate !important; border-spacing: 0 !important; margin: 1.2em 0 !important; border-radius: 10px !important; overflow: hidden !important; box-shadow: 0 0 0 1px #e2e8f0, 0 4px 6px -1px rgba(0,0,0,0.05) !important; font-size: 0.9rem !important; }
397
+ .prose th { background: #f8fafc !important; color: #475569 !important; font-weight: 600 !important; padding: 10px 14px !important; border-bottom: 2px solid #e2e8f0 !important; }
 
 
 
398
  .prose tr:nth-child(even) td { background: #f8fafc !important; }
399
+ .prose td { padding: 9px 14px !important; color: #334155 !important; border-bottom: 1px solid #e2e8f0 !important; }
400
  .prose tr:last-child td { border-bottom: none !important; }
401
+ .prose blockquote { border-left: 4px solid #3b82f6 !important; background: linear-gradient(to right,#eff6ff,#fff) !important; padding: 14px 18px !important; border-radius: 0 8px 8px 0 !important; color: #1e40af !important; margin: 1.2em 0 !important; }
 
402
  button.primary { background: linear-gradient(135deg,#3b82f6 0%,#2563eb 100%) !important; border: none !important; box-shadow: 0 4px 12px rgba(37,99,235,0.25) !important; font-weight: 600 !important; }
 
403
  footer { display: none !important; }
404
  """
405
 
 
414
  with gr.Tab("项目概览"):
415
  gr.Markdown(
416
  "## 研究背景\n\n"
417
+ "大语言模型在教育领域广泛应用,训练过程中不可避免地接触到学生敏感数据。"
418
+ "**成员推理攻击 (MIA)** 能判断某条数据是否参与了模型训练,构成隐私威胁。\n\n"
 
419
  "---\n\n"
420
  "## 实验设计\n\n"
421
  "| 阶段 | 内容 | 方法 |\n"
 
423
  "| 1. 数据准备 | 2000条小学数学辅导对话 | 模板化生成,含姓名/学号/成绩等隐私字段 |\n"
424
  "| 2. 基线模型训练 | Qwen2.5-Math-1.5B + LoRA | 标准微调,无任何防御措施 |\n"
425
  "| 3. 标签平滑模型训练 | 两组不同平滑系数 | e=0.02(温和) 与 e=0.2(强力) 分别训练 |\n"
426
+ "| 4. MIA攻击测试 | 对全部模型及策略发起攻击 | 三个模型Loss-based攻击 + 三组输出扰动测试 |\n"
427
+ "| 5. 效用评估 | 300道数学测试 | 三个模型分别测试准确率 |\n"
428
+ "| 6. 综合分析 | 隐私-效用权衡 | 散点图 + 定量对比 |\n\n"
 
429
  "---\n\n"
430
  "## 实验配置\n\n"
431
  "| 项目 | 值 |\n"
 
434
  "| 微调方法 | LoRA (r=8, alpha=16, target: q/k/v/o_proj) |\n"
435
  "| 训练轮数 | 10 epochs |\n"
436
  "| 数据总量 | " + data_size_str + " 条 (成员1000 + 非成员1000) |\n"
437
+ "| 训练模型数 | 3个 (基线 + 标签平滑x2) |\n"
438
+ "| 输出扰动测试 | 3组 (s=0.01/0.015/0.02,在基线模型上) |\n")
439
 
440
  with gr.Tab("数据展示"):
441
  gr.Markdown("## 数据集概况\n\n"
442
+ "成员数据1000条(训练集)与非成员数据1000条(对照组),每条均包含学生隐私字段。\n\n"
443
+ "### 任务类型分布\n\n"
444
+ "| 类型 | 数量 | 占比 | 说明 |\n"
445
+ "|------|------|------|------|\n"
446
+ "| 基础计算 | 800 | 40% | 加减乘除等基本运算 |\n"
447
+ "| 应用题 | 600 | 30% | 实际场景的数学问题 |\n"
448
+ "| 概念问答 | 400 | 20% | 数学概念理解 |\n"
449
+ "| 错题订正 | 200 | 10% | 常见错误分析纠正 |\n")
450
  with gr.Row():
 
 
451
  with gr.Column(scale=1):
452
  gr.Markdown("**选择靶向数据池**")
453
  data_sel = gr.Radio(["成员数据(训练集)", "非成员数据(测试集)"],
454
  value="成员数据(训练集)", label="")
455
  sample_btn = gr.Button("随机提取", variant="primary")
456
  sample_info = gr.Markdown()
457
+ with gr.Column(scale=1):
458
+ gr.Markdown("**原始对话内容**")
459
+ sample_q = gr.Textbox(label="学生提问 (Prompt)", lines=5, interactive=False)
460
+ sample_a = gr.Textbox(label="模型回答 (Ground Truth)", lines=5, interactive=False)
461
  sample_btn.click(show_random_sample, [data_sel], [sample_info, sample_q, sample_a])
462
 
463
  with gr.Tab("MIA攻击演示"):
464
  gr.Markdown(
465
  "## 发起成员推理攻击\n\n"
466
+ "选择攻击目标模型或防御策略),系统将计算该样本的Loss值并判定成员身份。\n")
467
  with gr.Row():
468
  with gr.Column(scale=1):
469
  atk_model = gr.Radio(
470
+ ["基线模型 (Baseline)", "标签平滑模型 (e=0.02)", "标签平滑模型 (e=0.2)",
471
+ "输出扰动 (s=0.01)", "输出扰动 (s=0.015)", "输出扰动 (s=0.02)"],
472
+ value="基线模型 (Baseline)", label="选择攻击目标")
473
  atk_type = gr.Radio(["成员数据(训练集)", "非成员数据(测试集)"],
474
  value="成员数据(训练集)", label="模拟真实数据来源")
475
  atk_idx = gr.Slider(0, 999, step=1, value=0, label="样本游标 ID (0-999)")
 
484
  with gr.Tab("防御对比"):
485
  gr.Markdown(
486
  "## 防御策略效果对比\n\n"
 
487
  "| 策略 | 类型 | 原理 | 实验验证的优势 | 实验观察到的局限 |\n"
488
  "|------|------|------|---------------|----------------|\n"
489
+ "| 标签平滑 | 训练期 | 软化训练标签,抑制过度记忆 | e=0.02时AUC降至" + f"{s002_auc:.4f}" + ",准确率" + f"{s002_acc:.1f}" + "% | 需重新训练;e过大时可能影响效用 |\n"
490
+ "| 输出扰动 | 推理期 | 对Loss添加高斯噪声 | s=0.02时AUC降至" + f"{op002_auc:.4f}" + ",准确率不变 | 仅遮蔽Loss统计信号,不改变模型记忆 |\n")
491
+ gr.Markdown("### AUC对比(全部策略)")
492
+ gr.Plot(value=make_auc_bar())
493
+ gr.Markdown("### Loss分布对比 - 三个模型(训练期防御效果)")
494
+ gr.Plot(value=make_loss_distribution())
495
+ gr.Markdown("### Loss分布对比 - 输出扰动(推理期防御效果)")
496
+ gr.Plot(value=make_perturb_loss_distribution())
 
 
497
 
498
  tbl = (
499
  "### 完整实验结果\n\n"
 
518
  gr.Markdown(
519
  "## 一、标签平滑 (Label Smoothing)\n\n"
520
  "**类型**: 训��期防御\n\n"
521
+ "将训练标签从硬标签 (one-hot) 转换为软标签,降低模型对训练样本的过度拟合。\n\n"
522
+ "**公式**: y_smooth = (1 - e) * y_onehot + e / V\n\n"
523
+ "其中 e 为平滑系数,V 为词汇表大小。当 e=0 时退化为标准训练。\n\n"
 
524
  "| 参数 | AUC | 准确率 | 分析 |\n"
525
  "|------|-----|--------|------|\n"
526
  "| 基线 (e=0) | " + f"{bl_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | 无防御,攻击风险较高 |\n"
 
529
  "---\n\n"
530
  "## 二、输出扰动 (Output Perturbation)\n\n"
531
  "**类型**: 推理期防御\n\n"
532
+ "在推理阶段对模型返回的Loss值注入高斯噪声,使攻击者难以区分成员与非成员。\n\n"
533
+ "**公式**: L_perturbed = L_original + N(0, s^2)\n\n"
534
+ "其中 s 为噪声标准差,控制扰动强度。\n\n"
535
  "| 参数 | AUC | AUC降幅 | 准确率 |\n"
536
  "|------|-----|---------|--------|\n"
537
  "| 基线 (s=0) | " + f"{bl_auc:.4f}" + " | -- | " + f"{bl_acc:.1f}" + "% |\n"
 
545
  "| 作用阶段 | 训练期 | 推理期 |\n"
546
  "| 是否需要重训 | 是 | 否 |\n"
547
  "| 对效用的影响 | 取决于平滑系数 | 无影响 |\n"
548
+ "| 防御原理 | 抑制过拟合,降低记忆 | 遮蔽Loss统计信号 |\n"
549
+ "| 部署难度 | 需训练阶段介入 | 推理阶段即插即用 |\n")
 
550
 
551
  with gr.Tab("效用评估"):
552
+ gr.Markdown("## 效用评估\n\n> 测试集: 300道数学题\n")
 
 
553
  with gr.Row():
554
  with gr.Column():
555
  gr.Markdown("### 准确率对比")
 
557
  with gr.Column():
558
  gr.Markdown("### 隐私-效用权衡")
559
  gr.Plot(value=make_tradeoff())
 
560
  gr.Markdown(
561
  "### 效用分析\n\n"
562
  "| 策略 | 准确率 | AUC | 效用变化 | 分析 |\n"
563
  "|------|--------|-----|---------|------|\n"
564
+ "| 基线 | " + f"{bl_acc:.1f}" + "% | " + f"{bl_auc:.4f}" + " | -- | 效用基准,隐私风险最高 |\n"
565
+ "| LS(e=0.02) | " + f"{s002_acc:.1f}" + "% | " + f"{s002_auc:.4f}" + " | " + f"{s002_acc-bl_acc:+.1f}" + "pp | 适度正则化提升泛化,准确率上升 |\n"
566
+ "| LS(e=0.2) | " + f"{s02_acc:.1f}" + "% | " + f"{s02_auc:.4f}" + " | " + f"{s02_acc-bl_acc:+.1f}" + "pp | 防御增效用仍可接受 |\n"
567
  "| OP(s=0.01) | " + f"{bl_acc:.1f}" + "% | " + f"{op001_auc:.4f}" + " | 0 | 零效用损失 |\n"
568
  "| OP(s=0.015) | " + f"{bl_acc:.1f}" + "% | " + f"{op0015_auc:.4f}" + " | 0 | 零效用损失 |\n"
569
  "| OP(s=0.02) | " + f"{bl_acc:.1f}" + "% | " + f"{op002_auc:.4f}" + " | 0 | 零效用损失 |\n\n"
570
+ "> **关键发现**: 标签平滑 e=0.02 因正则化效应反而提升了泛化能力。"
571
+ "输出扰动在不影响效用的前提下实现了有效防御。两类策略在效用维度上呈现互补特性。\n")
 
572
 
573
  with gr.Tab("实验结果可视化"):
574
  gr.Markdown("## 实验核心图表")
575
+ for fn, cap in [("fig1_loss_distribution_comparison.png", "图1: 成员与非成员Loss分布对比"),
576
+ ("fig2_privacy_utility_tradeoff_fixed.png", "图2: 隐私风险与模型效用权衡分析"),
577
  ("fig3_defense_comparison_bar.png", "图3: 各防御策略MIA攻击AUC对比")]:
578
  p = os.path.join(BASE_DIR, "figures", fn)
579
  if os.path.exists(p):
580
  gr.Markdown("### " + cap)
581
+ gr.Image(value=p, show_label=False, height=450)
582
  gr.Markdown("---")
583
 
584
  with gr.Tab("研究结论"):
585
  gr.Markdown(
586
+ "## 研究结论\n\n---\n\n"
 
587
  "### 一、教育大模型面临显著的成员推理攻击风险\n\n"
588
+ "基线模型AUC = **" + f"{bl_auc:.4f}" + "**显著高于随机猜测 (0.5)。"
589
+ "成员平均Loss (" + f"{bl_m_mean:.4f}" + ") 低非成员 (" + f"{bl_nm_mean:.4f}" + ")"
590
+ "表明模型对训练数据产生了可被利用记忆效应。\n\n---\n\n"
591
+ "### 二、标签平滑性与局限性\n\n"
592
+ "- e=0.02: AUC " + f"{bl_auc:.4f}" + " -> " + f"{s002_auc:.4f}" + "准确率 " + f"{s002_acc:.1f}" + "%\n"
593
+ "- e=0.2: AUC " + f"{bl_auc:.4f}" + " -> " + f"{s02_auc:.4f}" + ",准确率 " + f"{s02_acc:.1f}" + "%\n\n"
594
+ "平滑系数需在保护强度与效用之间权衡,e=0.02取得较好平衡。\n\n---\n\n"
595
+ "### 输出扰动独特优势\n\n"
596
+ "| 参数 | AUC | AUC降幅 | 准确率 |\n"
597
+ "|------|-----|---------|--------|\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
598
  "| s=0.01 | " + f"{op001_auc:.4f}" + " | " + f"{bl_auc-op001_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% (不变) |\n"
599
  "| s=0.015 | " + f"{op0015_auc:.4f}" + " | " + f"{bl_auc-op0015_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% (不变) |\n"
600
  "| s=0.02 | " + f"{op002_auc:.4f}" + " | " + f"{bl_auc-op002_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% (不变) |\n\n"
601
+ "用损失,适合已部署系的后期隐私加固\n\n---\n\n"
 
 
 
602
  "### 四、隐私-效用权衡的定量分析\n\n"
603
+ "| 策略 | AUC | 准确率 | AUC变化 | 效用变化 |\n"
604
+ "|------|-----|--------|--------|--------|\n"
605
+ "| 基线 | " + f"{bl_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | -- | -- |\n"
606
+ "| LS e=0.02 | " + f"{s002_auc:.4f}" + " | " + f"{s002_acc:.1f}" + "% | " + f"{s002_auc-bl_auc:+.4f}" + " | " + f"{s002_acc-bl_acc:+.1f}" + "pp |\n"
607
+ "| LS e=0.2 | " + f"{s02_auc:.4f}" + " | " + f"{s02_acc:.1f}" + "% | " + f"{s02_auc-bl_auc:+.4f}" + " | " + f"{s02_acc-bl_acc:+.1f}" + "pp |\n"
608
+ "| OP s=0.01 | " + f"{op001_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | " + f"{op001_auc-bl_auc:+.4f}" + " | 0 |\n"
609
+ "| OP s=0.015 | " + f"{op0015_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | " + f"{op0015_auc-bl_auc:+.4f}" + " | 0 |\n"
610
+ "| OP s=0.02 | " + f"{op002_auc:.4f}" + " | " + f"{bl_acc:.1f}" + "% | " + f"{op002_auc-bl_auc:+.4f}" + " | 0 |\n\n"
611
+ "两类策略在机制上互补:标签平滑从训练阶段降低记忆,输出扰动从推理阶段遮蔽统计信号。"
612
+ "实际部署中根据场景灵活选择或组合\n")
613
+
614
+ gr.Markdown("---\n\n<center>教育大模型中的成员推理攻击及其防御思路研究</center>\n")
 
 
 
 
615
 
616
  demo.launch()