Spaces:

xiaohy
/

MathTutor-MIA-Demo

Sleeping

App Files Files Community

xiaohy commited on 11 days ago

Commit

72d2d35

verified ·

1 Parent(s): e6788b5

Create app.py

Browse files

Files changed (1) hide show

app.py +963 -0

app.py ADDED Viewed

	@@ -0,0 +1,963 @@

+# ================================================================
+# 🎓 教育��模型中的成员推理攻击及其防御研究
+#    Membership Inference Attack & Defense in Educational LLMs
+# ================================================================
+# 部署平台：Hugging Face Spaces (永久免费)
+# SDK：Gradio
+# 硬件：CPU basic (Free) — 不需要 GPU
+# ================================================================
+import os
+import json
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import gradio as gr
+# ========================================
+# 1. 加载所有数据
+# ========================================
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+def load_json(relative_path):
+    """安全加载 JSON 文件"""
+    path = os.path.join(BASE_DIR, relative_path)
+    if not os.path.exists(path):
+        raise FileNotFoundError(f"文件不存在: {path}")
+    with open(path, 'r', encoding='utf-8') as f:
+        return json.load(f)
+# 训练/测试数据
+member_data = load_json("data/member.json")
+non_member_data = load_json("data/non_member.json")
+# 实验结果
+mia_results = load_json("results/mia_results.json")
+utility_results = load_json("results/utility_results.json")
+perturb_results = load_json("results/perturbation_results.json")
+full_results = load_json("results/mia_full_results.json")
+# 项目配置
+config = load_json("config.json")
+# 字体设置（兼容 Spaces 环境）
+plt.rcParams['font.sans-serif'] = ['DejaVu Sans', 'Arial', 'sans-serif']
+plt.rcParams['axes.unicode_minus'] = False
+# ========================================
+# 2. 图表生成函数
+# ========================================
+def make_pie_chart():
+    """数据集任务分布饼图"""
+    task_counts = {}
+    for item in member_data + non_member_data:
+        t = item.get('task_type', 'unknown')
+        task_counts[t] = task_counts.get(t, 0) + 1
+    name_map = {
+        'calculation': 'Calculation (40%)',
+        'word_problem': 'Word Problem (30%)',
+        'concept': 'Concept Q&A (20%)',
+        'error_correction': 'Error Correction (10%)'
+    }
+    labels = [name_map.get(k, k) for k in task_counts]
+    sizes = list(task_counts.values())
+    colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4']
+    fig, ax = plt.subplots(figsize=(8, 6))
+    wedges, texts, autotexts = ax.pie(
+        sizes,
+        labels=labels,
+        autopct='%1.1f%%',
+        colors=colors[:len(labels)],
+        explode=[0.04] * len(labels),
+        shadow=True,
+        startangle=90,
+        textprops={'fontsize': 11}
+    )
+    for t in autotexts:
+        t.set_fontsize(12)
+        t.set_fontweight('bold')
+    ax.set_title(
+        'Dataset Task Distribution (2000 samples)',
+        fontsize=15, fontweight='bold', pad=15
+    )
+    plt.tight_layout()
+    return fig
+def make_loss_distribution():
+    """Loss 分布直方图（使用真实 loss 数据）"""
+    plot_items = []
+    for k, t in [('baseline', 'Baseline'),
+                 ('smooth_0.02', 'Label Smoothing e=0.02'),
+                 ('smooth_0.2', 'Label Smoothing e=0.2')]:
+        if k in full_results:
+            auc = mia_results.get(k, {}).get('auc', 0)
+            plot_items.append((k, f"{t}\nAUC = {auc:.4f}"))
+    n = len(plot_items)
+    if n == 0:
+        fig, ax = plt.subplots()
+        ax.text(0.5, 0.5, 'No data available', ha='center', va='center')
+        return fig
+    fig, axes = plt.subplots(1, n, figsize=(6 * n, 5))
+    if n == 1:
+        axes = [axes]
+    for ax, (k, title) in zip(axes, plot_items):
+        m_losses = full_results[k]['member_losses']
+        nm_losses = full_results[k]['non_member_losses']
+        all_losses = m_losses + nm_losses
+        bins = np.linspace(min(all_losses), max(all_losses), 40)
+        ax.hist(m_losses, bins=bins, alpha=0.55, color='#4A90D9',
+                label=f'Members (u={np.mean(m_losses):.3f})', density=True)
+        ax.hist(nm_losses, bins=bins, alpha=0.55, color='#E74C3C',
+                label=f'Non-Members (u={np.mean(nm_losses):.3f})', density=True)
+        ax.set_title(title, fontsize=13, fontweight='bold')
+        ax.set_xlabel('Loss Value', fontsize=11)
+        ax.set_ylabel('Density', fontsize=11)
+        ax.legend(fontsize=9)
+        ax.grid(True, linestyle='--', alpha=0.4)
+    plt.suptitle(
+        'Member vs Non-Member Loss Distribution',
+        fontsize=16, fontweight='bold', y=1.02
+    )
+    plt.tight_layout()
+    return fig
+def make_auc_bar():
+    """所有防御策略 AUC 柱状图"""
+    methods = []
+    aucs = []
+    colors = []
+    # MIA 模型结果
+    for k, name, c in [
+        ('baseline', 'Baseline', '#95A5A6'),
+        ('smooth_0.02', 'LS e=0.02', '#5B9BD5'),
+        ('smooth_0.2', 'LS e=0.2', '#2E5FA1'),
+    ]:
+        if k in mia_results:
+            methods.append(name)
+            aucs.append(mia_results[k]['auc'])
+            colors.append(c)
+    # 输出扰动结果
+    for k, name, c in [
+        ('perturbation_0.01', 'OP s=0.01', '#27AE60'),
+        ('perturbation_0.015', 'OP s=0.015', '#1E8449'),
+        ('perturbation_0.02', 'OP s=0.02', '#145A32'),
+    ]:
+        if k in perturb_results:
+            methods.append(name)
+            aucs.append(perturb_results[k]['auc'])
+            colors.append(c)
+    fig, ax = plt.subplots(figsize=(11, 6))
+    bars = ax.bar(
+        methods, aucs, color=colors, width=0.55,
+        edgecolor='white', linewidth=1.5
+    )
+    # 数值标签
+    for bar, auc_val in zip(bars, aucs):
+        ax.text(
+            bar.get_x() + bar.get_width() / 2,
+            bar.get_height() + 0.004,
+            f'{auc_val:.3f}',
+            ha='center', va='bottom', fontsize=13, fontweight='bold'
+        )
+    # 参考线
+    baseline_auc = mia_results.get('baseline', {}).get('auc', 0.63)
+    ax.axhline(y=0.5, color='red', linestyle='--', linewidth=2,
+               label='Random Guess (AUC=0.5)')
+    ax.axhline(y=baseline_auc, color='black', linestyle=':',
+               linewidth=1.5, label='Baseline Risk')
+    ax.set_ylabel('MIA Attack AUC', fontsize=13)
+    ax.set_title(
+        'Comparison of All Defense Mechanisms',
+        fontsize=15, fontweight='bold'
+    )
+    ax.set_ylim(0.45, max(aucs) + 0.06 if aucs else 1.0)
+    ax.legend(fontsize=11)
+    ax.grid(axis='y', linestyle='--', alpha=0.4)
+    plt.xticks(rotation=10)
+    plt.tight_layout()
+    return fig
+def make_tradeoff():
+    """隐私-效用权衡散点图"""
+    fig, ax = plt.subplots(figsize=(10, 7))
+    points = []
+    # MIA 模型
+    for k, name, marker, color, sz in [
+        ('baseline', 'Baseline (No Defense)', 'o', 'black', 180),
+        ('smooth_0.02', 'Label Smoothing e=0.02', 's', '#5B9BD5', 160),
+        ('smooth_0.2', 'Label Smoothing e=0.2', 's', '#2E5FA1', 160),
+    ]:
+        if k in mia_results and k in utility_results:
+            points.append({
+                'name': name,
+                'auc': mia_results[k]['auc'],
+                'acc': utility_results[k]['accuracy'],
+                'marker': marker, 'color': color, 'size': sz
+            })
+    # 输出扰动（准确率 = 基线准确率）
+    base_acc = utility_results.get('baseline', {}).get('accuracy', 0.633)
+    for k, name, marker, color, sz in [
+        ('perturbation_0.01', 'Output Perturb s=0.01', '^', '#27AE60', 170),
+        ('perturbation_0.02', 'Output Perturb s=0.02', '^', '#145A32', 170),
+    ]:
+        if k in perturb_results:
+            points.append({
+                'name': name,
+                'auc': perturb_results[k]['auc'],
+                'acc': base_acc,
+                'marker': marker, 'color': color, 'size': sz
+            })
+    for p in points:
+        ax.scatter(
+            p['acc'], p['auc'],
+            label=p['name'], marker=p['marker'], color=p['color'],
+            s=p['size'], edgecolors='white', linewidth=1.5, zorder=5
+        )
+    ax.axhline(y=0.5, color='gray', linestyle='--', alpha=0.7,
+               label='Random Guess (AUC=0.5)')
+    ax.set_xlabel('Model Utility (Test Accuracy)', fontsize=13, fontweight='bold')
+    ax.set_ylabel('Privacy Risk (MIA AUC)', fontsize=13, fontweight='bold')
+    ax.set_title('Privacy-Utility Trade-off Analysis', fontsize=15, fontweight='bold')
+    # 自动坐标范围
+    all_acc = [p['acc'] for p in points]
+    all_auc = [p['auc'] for p in points]
+    if all_acc and all_auc:
+        ax.set_xlim(min(all_acc) - 0.03, max(all_acc) + 0.05)
+        ax.set_ylim(min(min(all_auc), 0.5) - 0.02, max(all_auc) + 0.02)
+        # 区域标注
+        ax.text(
+            min(all_acc) - 0.02, max(all_auc) + 0.01,
+            'High Risk / Low Utility', fontsize=10, color='red',
+            bbox=dict(facecolor='red', alpha=0.1)
+        )
+        ax.text(
+            max(all_acc) + 0.03, min(min(all_auc), 0.5) + 0.005,
+            'Ideal Zone', fontsize=10, color='green',
+            bbox=dict(facecolor='green', alpha=0.1)
+        )
+    ax.legend(loc='upper right', frameon=True, shadow=True, fontsize=10)
+    ax.grid(True, alpha=0.3)
+    plt.tight_layout()
+    return fig
+def make_accuracy_bar():
+    """准确率对比柱状图"""
+    names = []
+    accs = []
+    colors = []
+    for k, name, c in [
+        ('baseline', 'Baseline', '#95A5A6'),
+        ('smooth_0.02', 'LS e=0.02', '#5B9BD5'),
+        ('smooth_0.2', 'LS e=0.2', '#2E5FA1'),
+    ]:
+        if k in utility_results:
+            names.append(name)
+            accs.append(utility_results[k]['accuracy'] * 100)
+            colors.append(c)
+    # 输出扰动准确率 = 基线准确率
+    base_pct = utility_results.get('baseline', {}).get('accuracy', 0) * 100
+    for k, name, c in [
+        ('perturbation_0.01', 'OP s=0.01', '#27AE60'),
+        ('perturbation_0.02', 'OP s=0.02', '#145A32'),
+    ]:
+        if k in perturb_results:
+            names.append(name)
+            accs.append(base_pct)
+            colors.append(c)
+    fig, ax = plt.subplots(figsize=(11, 6))
+    bars = ax.bar(
+        names, accs, color=colors, width=0.5,
+        edgecolor='white', linewidth=1.5
+    )
+    for bar, acc in zip(bars, accs):
+        ax.text(
+            bar.get_x() + bar.get_width() / 2,
+            bar.get_height() + 0.8,
+            f'{acc:.1f}%',
+            ha='center', va='bottom', fontsize=13, fontweight='bold'
+        )
+    ax.set_ylabel('Accuracy (%)', fontsize=13)
+    ax.set_title('Model Utility Comparison (300 Math Questions)', fontsize=15, fontweight='bold')
+    ax.set_ylim(0, 100)
+    ax.grid(axis='y', alpha=0.3)
+    plt.xticks(rotation=10)
+    plt.tight_layout()
+    return fig
+# ========================================
+# 3. 界面回调函数
+# ========================================
+def show_random_sample(data_type):
+    """随机展示一条数据样本"""
+    if "member" in data_type.lower() or "成员" in data_type:
+        data = member_data
+    else:
+        data = non_member_data
+    sample = data[np.random.randint(0, len(data))]
+    meta = sample['metadata']
+    task_name_map = {
+        'calculation': 'Calculation (基础计算)',
+        'word_problem': 'Word Problem (应用题)',
+        'concept': 'Concept Q&A (概念问答)',
+        'error_correction': 'Error Correction (错题订正)'
+    }
+    info = f"""### 📋 Sample Metadata (Privacy Fields)
+| Field | Value |
+|-------|-------|
+| **Name (姓名)** | {meta['name']} |
+| **Student ID (学号)** | {meta['student_id']} |
+| **Class (班级)** | {meta['class']} |
+| **Score (成绩)** | {meta['score']} |
+| **Task Type** | {task_name_map.get(sample['task_type'], sample['task_type'])} |
+> ⚠️ The above are **student privacy fields** that attackers attempt to infer!
+"""
+    return info, sample['question'], sample['answer']
+def run_mia_demo(sample_index, data_type):
+    """MIA 攻击演示（使用实验中保存的真实 loss 数据）"""
+    is_member = ("Member" in data_type or "成员" in data_type)
+    idx = min(int(sample_index), 999)
+    data = member_data if is_member else non_member_data
+    sample = data[idx]
+    # 从保存的完整 loss 数据中取出对应样本的真实 loss
+    bl = full_results.get('baseline', {})
+    if is_member and idx < len(bl.get('member_losses', [])):
+        loss = bl['member_losses'][idx]
+    elif not is_member and idx < len(bl.get('non_member_losses', [])):
+        loss = bl['non_member_losses'][idx]
+    else:
+        # 兜底：用统计信息模拟
+        m_mean_fb = mia_results.get('baseline', {}).get('member_loss_mean', 0.19)
+        nm_mean_fb = mia_results.get('baseline', {}).get('non_member_loss_mean', 0.23)
+        if is_member:
+            loss = float(np.random.normal(m_mean_fb, 0.02))
+        else:
+            loss = float(np.random.normal(nm_mean_fb, 0.02))
+    # 计算阈值
+    m_mean = mia_results.get('baseline', {}).get('member_loss_mean', 0.19)
+    nm_mean = mia_results.get('baseline', {}).get('non_member_loss_mean', 0.23)
+    threshold = (m_mean + nm_mean) / 2.0
+    pred_member = (loss < threshold)
+    actual_member = is_member
+    attack_correct = (pred_member == actual_member)
+    # ===== Loss 位置可视化 =====
+    bar_total = 40
+    if nm_mean > m_mean:
+        ratio = (loss - m_mean) / (nm_mean - m_mean)
+    else:
+        ratio = 0.5
+    ratio = max(0.0, min(1.0, ratio))
+    pos = int(bar_total * ratio)
+    bar_left = "=" * pos
+    bar_right = "=" * (bar_total - pos)
+    bar_visual = bar_left + "V" + bar_right
+    # 阈值位置标记
+    threshold_pos = int(bar_total * 0.5)
+    threshold_bar = " " * threshold_pos + "|"
+    # 判定文字
+    if pred_member:
+        pred_text = "🔴 **MEMBER** (Loss < Threshold → Model is too familiar)"
+    else:
+        pred_text = "🟢 **NON-MEMBER** (Loss >= Threshold → Model is not familiar)"
+    if actual_member:
+        actual_text = "🔴 **MEMBER** (This data WAS used in training)"
+    else:
+        actual_text = "🟢 **NON-MEMBER** (This data was NOT used in training)"
+    if attack_correct and pred_member and actual_member:
+        result_text = "✅ **ATTACK SUCCESS — Privacy Leaked!**"
+        result_emoji = "⚠️"
+    elif attack_correct:
+        result_text = "✅ **Correct Judgment**"
+        result_emoji = "✅"
+    else:
+        result_text = "❌ **Attack Failed**"
+        result_emoji = "❌"
+    result_md = f"""## 🔍 MIA Attack Result
+ # ===== Build the visualization block as a separate string =====
+    viz_block = (
+        "    Member Zone (Low Loss)              Non-Member Zone (High Loss)\n"
+        "    <--------------------|----------------------->\n"
+        "                     Threshold\n"
+        "\n"
+        f"    [{bar_visual}]\n"
+        "     |                    |                     |\n"
+        "   Member Mean        Threshold           Non-Member Mean\n"
+        f"   {m_mean:.4f}             {threshold:.4f}              {nm_mean:.4f}\n"
+        "\n"
+        f"    Current Loss = {loss:.4f}\n"
+        f"    Position: {position_text}\n"
+    )
+    # ===== Build the warning/safe message =====
+    if pred_member:
+        warning_msg = (
+            f"⚠️ **Privacy Risk!** This sample Loss = {loss:.4f} "
+            f"is BELOW the threshold ({threshold:.4f}). "
+            "The model 'remembers' this data — student privacy may be compromised!"
+        )
+    else:
+        warning_msg = (
+            f"✅ This sample Loss = {loss:.4f} "
+            f"is ABOVE the threshold ({threshold:.4f}). "
+            "The model shows no special memorization of this data."
+        )
+    # ===== Assemble the final Markdown =====
+    result_md = (
+        "## 🔍 MIA Attack Result\n\n"
+        "### 📊 Loss Computation\n\n"
+        "| Metric | Value |\n"
+        "|--------|-------|\n"
+        f"| **Sample Loss** | `{loss:.6f}` |\n"
+        f"| **Decision Threshold** | `{threshold:.6f}` |\n"
+        f"| **Member Mean Loss** | `{m_mean:.6f}` |\n"
+        f"| **Non-Member Mean Loss** | `{nm_mean:.6f}` |\n\n"
+        "### 📏 Loss Position Visualization\n\n"
+        "```\n"
+        f"{viz_block}"
+        "```\n\n"
+        "### 🎯 Attack Judgment\n\n"
+        "| Item | Result |\n"
+        "|------|--------|\n"
+        f"| **Attacker Prediction** | {result_icon} {pred_text} |\n"
+        f"| **Actual Identity** | {actual_text} |\n"
+        f"| **Attack Outcome** | {result_icon} **{result_text}** |\n\n"
+        "### 💡 How It Works\n\n"
+        "The model produces **lower Loss** on data it was **trained on** "
+        "(it's more \"confident\"). The attacker exploits this statistical difference:\n\n"
+        f"- Loss **below** threshold `{threshold:.4f}` → Predicted as **training member** → ⚠️ Privacy risk\n"
+        f"- Loss **above** threshold `{threshold:.4f}` → Predicted as **non-member** → ✅ Relatively safe\n\n"
+        f"{warning_msg}\n\n"
+        "> 📌 *This demo uses real Loss values saved from the experiment (not real-time inference).*\n"
+    )
+    question_display = f"**📝 Sample #{idx}:**\n\n{sample['question'][:600]}"
+    return question_display, result_md
+### 🎯 Attack Judgment
+| Item | Result |
+|------|--------|
+| **Attacker Prediction** | {pred_text} |
+| **Actual Identity** | {actual_text} |
+| **Attack Outcome** | {result_emoji} {result_text} |
+### 💡 How It Works
+The model produces **lower Loss** on data it was **trained on** (it's more "confident").
+The attacker exploits this statistical difference:
+- Loss **below** threshold `{threshold:.4f}` → Predicted as **training member** → ⚠️ Privacy risk
+- Loss **above** threshold `{threshold:.4f}` → Predicted as **non-member** → ✅ Relatively safe
+{"⚠️ **Privacy Risk!** This sample's Loss = " + f"{loss:.4f}" + " is BELOW the threshold. The model 'remembers' this data — student privacy may be compromised!" if pred_member else "✅ This sample's Loss = " + f"{loss:.4f}" + " is ABOVE the threshold. The model shows no special memorization of this data."}
+> 📌 *This demo uses real Loss values saved from the experiment (not real-time inference).*
+"""
+    question_display = f"**📝 Sample #{idx}:**\n\n{sample['question'][:600]}"
+    return question_display, result_md
+# ========================================
+# 4. 构建完整 Gradio 界面
+# ========================================
+custom_css = """
+.gradio-container {
+    max-width: 1280px !important;
+    margin: auto !important;
+}
+.tab-nav button {
+    font-size: 15px !important;
+    padding: 10px 18px !important;
+    font-weight: 600 !important;
+}
+footer {
+    display: none !important;
+}
+"""
+# 预先取出常用数值（避免在 Markdown 中报错）
+bl_auc = mia_results.get('baseline', {}).get('auc', 0)
+s002_auc = mia_results.get('smooth_0.02', {}).get('auc', 0)
+s02_auc = mia_results.get('smooth_0.2', {}).get('auc', 0)
+op001_auc = perturb_results.get('perturbation_0.01', {}).get('auc', 0)
+op0015_auc = perturb_results.get('perturbation_0.015', {}).get('auc', 0)
+op002_auc = perturb_results.get('perturbation_0.02', {}).get('auc', 0)
+bl_acc = utility_results.get('baseline', {}).get('accuracy', 0) * 100
+s002_acc = utility_results.get('smooth_0.02', {}).get('accuracy', 0) * 100
+s02_acc = utility_results.get('smooth_0.2', {}).get('accuracy', 0) * 100
+model_name_str = config.get('model_name', 'Qwen/Qwen2.5-Math-1.5B-Instruct')
+gpu_name_str = config.get('gpu_name', 'T4')
+data_size_str = config.get('data_size', 2000)
+setup_date_str = config.get('setup_date', 'N/A')
+with gr.Blocks(
+    title="Education LLM Privacy Attack & Defense",
+    theme=gr.themes.Soft(
+        primary_hue="blue",
+        secondary_hue="sky",
+        neutral_hue="slate"
+    ),
+    css=custom_css
+) as demo:
+    # ============================
+    # Header
+    # ============================
+    gr.Markdown(f"""
+# 🎓 Membership Inference Attack & Defense in Educational LLMs
+### 教育大模型中的成员推理攻击及其防御研究
+---
+> **Goal**: Investigate privacy leakage risks in educational LLMs and evaluate **Label Smoothing** + **Output Perturbation** as defense strategies.
+> **Tech Stack**: `Qwen2.5-Math-1.5B` · `LoRA Fine-tuning` · `Loss-based MIA` · `Label Smoothing` · `Output Perturbation`
+    """)
+    # ============================
+    # Tab 1: Project Overview
+    # ============================
+    with gr.Tab("🏠 Project Overview"):
+        overview_md = (
+            "## 📖 Research Background\n\n"
+            "As LLMs are increasingly deployed in education (tutoring systems, personalized learning),\n"
+            "they inevitably process student **private data** (names, IDs, grades).\n\n"
+            "**Membership Inference Attack (MIA)** can determine whether a data sample was used to train\n"
+            "the model, potentially exposing student privacy.\n\n"
+            "---\n\n"
+            "## 🔬 Research Design\n\n"
+            "| Phase | Content | Details |\n"
+            "|-------|---------|--------|\n"
+            "| 📂 Data | 2000 math tutoring dialogues | Contains names, student IDs, grades |\n"
+            "| 🧠 Training | Qwen2.5-Math + LoRA | Baseline + 2 label smoothing models |\n"
+            "| ⚔️ Attack | Loss-based MIA | Classify members by output loss |\n"
+            "| 🛡️ Train-time Defense | Label Smoothing (e=0.02, 0.2) | Regularization during training |\n"
+            "| 🛡️ Inference-time Defense | Output Perturbation (s=0.01~0.02) | Add noise at inference |\n"
+            "| 📊 Evaluation | Privacy-Utility Trade-off | AUC + Accuracy |\n\n"
+            "---\n\n"
+            "## ⚙️ Experiment Configuration\n\n"
+            "| Item | Value |\n"
+            "|------|-------|\n"
+            f"| **Base Model** | {model_name_str} |\n"
+            "| **Fine-tuning** | LoRA (r=8, alpha=16) |\n"
+            "| **Training Epochs** | 10 |\n"
+            f"| **Dataset Size** | {data_size_str} (1000 member + 1000 non-member) |\n"
+            f"| **GPU** | {gpu_name_str} |\n"
+            f"| **Date** | {setup_date_str} |\n\n"
+            "---\n\n"
+            "## 📐 Technical Pipeline\n\n"
+            "```\n"
+            "+-------------+     +-------------------+     +-----------+     +-------------------+     +------------+\n"
+            "| Data Gen    | --> | Baseline Training | --> | MIA Attack| --> | Defense Deploy    | --> | Evaluation |\n"
+            "| (2000)      |     | (LoRA fine-tune)  |     | (Loss)    |     | (LS + OP)         |     | (AUC+Acc)  |\n"
+            "+-------------+     +--------+----------+     +-----------+     +-------------------+     +------------+\n"
+            "                             |                                          |\n"
+            "                             +-- Label Smoothing Training --------------+\n"
+            "                                 (e=0.02, e=0.2)\n"
+            "```\n"
+        )
+        gr.Markdown(overview_md)
+        # ============================
+    # Tab 2: Data Explorer
+    # ============================
+    with gr.Tab("📊 Data Explorer"):
+        gr.Markdown("""
+## 📂 Dataset Overview
+- **Member data (Training set)**: 1000 samples — used to train the model
+- **Non-member data (Test set)**: 1000 samples — NOT used in training
+- Each sample contains **student privacy**: name, student ID, class, score
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("### 📊 Task Distribution")
+                gr.Plot(value=make_pie_chart())
+            with gr.Column(scale=1):
+                gr.Markdown("### 🔍 Random Sample Viewer")
+                data_type_selector = gr.Radio(
+                    choices=[
+                        "Member Data (Training Set / 成员数据)",
+                        "Non-Member Data (Test Set / 非成员数据)"
+                    ],
+                    value="Member Data (Training Set / 成员数据)",
+                    label="Select Data Type"
+                )
+                sample_btn = gr.Button(
+                    "🎲 Random Sample", variant="primary"
+                )
+        sample_info = gr.Markdown()
+        with gr.Row():
+            sample_q = gr.Textbox(
+                label="📝 Student Question", lines=7, interactive=False
+            )
+            sample_a = gr.Textbox(
+                label="💡 Model Answer", lines=7, interactive=False
+            )
+        sample_btn.click(
+            fn=show_random_sample,
+            inputs=[data_type_selector],
+            outputs=[sample_info, sample_q, sample_a]
+        )
+    # ============================
+    # Tab 3: MIA Attack Demo
+    # ============================
+    with gr.Tab("⚔️ MIA Attack Demo"):
+        gr.Markdown("""
+## ⚔️ Live Membership Inference Attack
+**How it works**: The model produces **lower Loss** on training data (it's more "confident").
+The attacker uses a **threshold** on Loss to predict membership.
+### 📌 Steps:
+1️⃣ Select data source (Member / Non-Member)
+2️⃣ Choose a sample index (0-999)
+3️⃣ Click **"Run Attack"** to see the result
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                atk_data_type = gr.Radio(
+                    choices=[
+                        "Member Data (成员数据)",
+                        "Non-Member Data (非成员数据)"
+                    ],
+                    value="Member Data (成员数据)",
+                    label="📂 Data Source"
+                )
+                atk_index = gr.Slider(
+                    minimum=0, maximum=999, step=1, value=0,
+                    label="📌 Sample Index (0-999)"
+                )
+                atk_btn = gr.Button(
+                    "⚔️ Run MIA Attack",
+                    variant="primary",
+                    size="lg"
+                )
+            with gr.Column(scale=1):
+                atk_question = gr.Markdown(label="Sample Content")
+        atk_result = gr.Markdown()
+        atk_btn.click(
+            fn=run_mia_demo,
+            inputs=[atk_index, atk_data_type],
+            outputs=[atk_question, atk_result]
+        )
+    # ============================
+    # Tab 4: Defense Comparison
+    # ============================
+    with gr.Tab("🛡️ Defense Comparison"):
+        gr.Markdown("""
+## 🛡️ Defense Strategy Comparison
+| Strategy | Type | Mechanism | Pros | Cons |
+|----------|------|-----------|------|------|
+| **Label Smoothing** | Train-time | Soften labels to prevent overfitting | Reduces memorization | May hurt utility |
+| **Output Perturbation** | Inference-time | Add Gaussian noise to Loss | Zero utility loss | Only masks signal |
+        """)
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("### 📊 AUC Comparison (All Defenses)")
+                gr.Plot(value=make_auc_bar())
+            with gr.Column():
+                gr.Markdown("### 📈 Loss Distribution (Baseline vs LS)")
+                gr.Plot(value=make_loss_distribution())
+        # Results table
+        gr.Markdown("### 📋 Detailed Results")
+        def risk_badge(auc_val):
+            if auc_val > 0.62:
+                return "🔴 High"
+            elif auc_val > 0.55:
+                return "🟡 Medium"
+            else:
+                return "🟢 Low"
+        table = "| Strategy | Type | AUC | Privacy Risk |\n"
+        table += "|----------|------|-----|-------------|\n"
+        for k, name, cat in [
+            ('baseline', 'Baseline (No Defense)', '—'),
+            ('smooth_0.02', 'Label Smoothing e=0.02', 'Train-time'),
+            ('smooth_0.2', 'Label Smoothing e=0.2', 'Train-time'),
+        ]:
+            if k in mia_results:
+                a = mia_results[k]['auc']
+                table += f"| {name} | {cat} | **{a:.4f}** | {risk_badge(a)} |\n"
+        for k, name in [
+            ('perturbation_0.01', 'Output Perturbation s=0.01'),
+            ('perturbation_0.015', 'Output Perturbation s=0.015'),
+            ('perturbation_0.02', 'Output Perturbation s=0.02'),
+        ]:
+            if k in perturb_results:
+                a = perturb_results[k]['auc']
+                table += f"| {name} | Inference-time | **{a:.4f}** | {risk_badge(a)} |\n"
+        gr.Markdown(table)
+    # ============================
+    # Tab 5: Output Perturbation
+    # ============================
+    with gr.Tab("🔊 Output Perturbation"):
+        gr.Markdown(f"""
+## 🔊 Output Perturbation Defense
+### 📌 Core Idea
+At **inference time**, add **Gaussian noise** to the model's output Loss:
+**Loss_perturbed = Loss_original + N(0, sigma^2)**
+### ✅ Key Advantage
+- **No retraining needed** (zero deployment cost)
+- **No utility loss** (accuracy stays exactly the same)
+- Noise level sigma can be tuned dynamically
+### 📊 Experiment Results
+| sigma | AUC | AUC Reduction | Accuracy | Note |
+|-------|-----|--------------|----------|------|
+| 0 (Baseline) | **{bl_auc:.4f}** | — | {bl_acc:.1f}% | No defense |
+| 0.01 | **{op001_auc:.4f}** | ↓{bl_auc - op001_auc:.4f} | {bl_acc:.1f}% (unchanged) | Mild |
+| 0.015 | **{op0015_auc:.4f}** | ↓{bl_auc - op0015_auc:.4f} | {bl_acc:.1f}% (unchanged) | Moderate |
+| 0.02 | **{op002_auc:.4f}** | ↓{bl_auc - op002_auc:.4f} | {bl_acc:.1f}% (unchanged) | **Recommended** |
+### 💡 Key Finding
+> Output Perturbation (s=0.02) reduces AUC from {bl_auc:.4f} to **{op002_auc:.4f}**
+> while keeping accuracy at **{bl_acc:.1f}%** — truly a **zero-cost defense**!
+        """)
+    # ============================
+    # Tab 6: Utility Evaluation
+    # ============================
+    with gr.Tab("📝 Utility Evaluation"):
+        gr.Markdown("""
+## 📐 Model Utility Evaluation
+> Defense must not sacrifice too much utility.
+> Test set: **300 math questions** covering calculation, word problems, and concept Q&A.
+        """)
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("### 📊 Accuracy Comparison")
+                gr.Plot(value=make_accuracy_bar())
+            with gr.Column():
+                gr.Markdown("### ⚖️ Privacy-Utility Trade-off")
+                gr.Plot(value=make_tradeoff())
+        # Utility table
+        ut = "| Strategy | Accuracy | AUC | Risk | Utility Impact |\n"
+        ut += "|----------|----------|-----|------|---------------|\n"
+        for k, name in [
+            ('baseline', 'Baseline'),
+            ('smooth_0.02', 'LS e=0.02'),
+            ('smooth_0.2', 'LS e=0.2'),
+        ]:
+            if k in utility_results and k in mia_results:
+                acc = utility_results[k]['accuracy'] * 100
+                auc = mia_results[k]['auc']
+                impact = "—" if k == 'baseline' else (
+                    "✅ Improved" if acc > bl_acc else "⚠️ Decreased"
+                )
+                ut += f"| {name} | **{acc:.1f}%** | {auc:.4f} | {risk_badge(auc)} | {impact} |\n"
+        for k, name in [
+            ('perturbation_0.01', 'OP s=0.01'),
+            ('perturbation_0.02', 'OP s=0.02'),
+        ]:
+            if k in perturb_results:
+                ut += (f"| {name} | **{bl_acc:.1f}%** | "
+                       f"{perturb_results[k]['auc']:.4f} | "
+                       f"{risk_badge(perturb_results[k]['auc'])} | ✅ No change |\n")
+        gr.Markdown(ut)
+    # ============================
+    # Tab 7: Paper Figures
+    # ============================
+    with gr.Tab("📄 Paper Figures"):
+        gr.Markdown("## 📄 Publication-Quality Figures (300 DPI)")
+        figure_items = [
+            ("fig1_loss_distribution_comparison.png",
+             "Figure 1: Loss Distribution — Baseline vs Label Smoothing"),
+            ("fig2_privacy_utility_tradeoff_fixed.png",
+             "Figure 2: Privacy-Utility Trade-off Analysis"),
+            ("fig3_defense_comparison_bar.png",
+             "Figure 3: Defense Mechanism AUC Comparison"),
+        ]
+        for filename, caption in figure_items:
+            path = os.path.join(BASE_DIR, "figures", filename)
+            if os.path.exists(path):
+                gr.Markdown(f"### {caption}")
+                gr.Image(value=path, show_label=False, height=420)
+                gr.Markdown("---")
+            else:
+                gr.Markdown(
+                    f"### {caption}\n\n"
+                    f"> ⚠️ File not found: `figures/{filename}` — "
+                    f"this figure is optional."
+                )
+    # ============================
+    # Tab 8: Conclusions
+    # ============================
+    with gr.Tab("🎓 Conclusions"):
+        gr.Markdown(f"""
+## 📝 Research Conclusions
+---
+### 🔬 Core Findings
+#### Finding 1: MIA poses a real threat to educational LLMs
+Baseline AUC = **{bl_auc:.4f}** (significantly above random guess of 0.5).
+Attackers can infer student data membership with high probability.
+#### Finding 2: Label Smoothing effectively reduces risk
+| Strategy | AUC | Accuracy | Verdict |
+|----------|-----|----------|---------|
+| Baseline | {bl_auc:.4f} | {bl_acc:.1f}% | High privacy risk |
+| LS e=0.02 | {s002_auc:.4f} | {s002_acc:.1f}% | ✅ **Recommended** |
+| LS e=0.2 | {s02_auc:.4f} | {s02_acc:.1f}% | ⚠️ Strong defense, possible utility impact |
+#### Finding 3: Output Perturbation is a zero-cost defense
+sigma=0.02 reduces AUC from {bl_auc:.4f} to **{op002_auc:.4f}** with **zero accuracy loss**.
+#### Finding 4: Best practice — combine both defenses
+> **Recommended**: LS e=0.02 (training) + OP s=0.02 (inference) = **Dual Protection**
+---
+### 🎤 Defense Presentation Script
+> "This study uses a math tutoring scenario with Qwen2.5-Math-1.5B + LoRA fine-tuning.
+> The baseline model shows AUC={bl_auc:.4f}, indicating significant privacy leakage risk.
+> We evaluate two complementary defenses: **Label Smoothing** (train-time, e=0.02)
+> and **Output Perturbation** (inference-time, s=0.02).
+> Output perturbation achieves **zero utility loss**, making it ideal for practical deployment.
+> The study reveals the fundamental privacy-utility trade-off in educational AI."
+---
+### 📚 Innovation Points
+1. **Novel scenario** — Focus on educational LLM privacy (not general NLP)
+2. **Dual defense** — Both train-time and inference-time strategies
+3. **Practical** — Label smoothing = 1 line of code; Output perturbation = 1 line of code
+4. **Comprehensive** — Attack + Defense + Utility + Trade-off analysis
+---
+### 🔮 Future Work
+- Explore **Differential Privacy (DP-SGD)** for stronger guarantees
+- Test **Shadow Model Attack** and other advanced MIA variants
+- Validate on real educational datasets
+- Investigate **Federated Learning** for educational model privacy
+        """)
+    # ============================
+    # Footer
+    # ============================
+    gr.Markdown("""
+---
+<center>
+🎓 **Membership Inference Attack & Defense in Educational LLMs**
+`Qwen2.5-Math-1.5B` · `LoRA` · `MIA` · `Label Smoothing` · `Output Perturbation` · `Gradio`
+</center>
+    """)
+# ========================================
+# 5. Launch
+# ========================================
+demo.launch()