PandaGuard-leaderboard

Running

App Files Files Community

PandaGuard-leaderboard / app.py

xianghe

update

96efaf9 5 months ago

raw

history blame contribute delete

28.7 kB

	from huggingface_hub import hf_hub_download
	import gradio as gr
	import pandas as pd
	import numpy as np

	# 全局缓存变量
	_cached_df = None
	_cached_models = None
	_cached_jailbreak_types = None
	_cached_attack_methods = None
	_cached_defense_methods = None

	# Model icon URLs - 使用混合 CDN 策略
	MODEL_ICON_URLS = {
	'claude': 'https://registry.npmmirror.com/@lobehub/icons-static-png/1.0.0/files/dark/claude-color.png',
	'gpt': 'https://unpkg.com/@lobehub/icons-static-png@1.74.0/light/openai.png',
	'gemini': 'https://registry.npmmirror.com/@lobehub/icons-static-png/1.0.0/files/dark/gemini-color.png',
	'grok': 'https://registry.npmmirror.com/@lobehub/icons-static-png/1.0.0/files/dark/x-color.png',
	'llama': 'https://registry.npmmirror.com/@lobehub/icons-static-png/1.0.0/files/dark/meta-color.png',
	'qwen': 'https://registry.npmmirror.com/@lobehub/icons-static-png/1.0.0/files/dark/tongyi-color.png',
	'deepseek': 'https://registry.npmmirror.com/@lobehub/icons-static-png/1.0.0/files/dark/deep-seek-color.png',
	'glm': 'https://registry.npmmirror.com/@lobehub/icons-static-png/1.0.0/files/dark/zhipu-color.png',
	'doubao': 'https://registry.npmmirror.com/@lobehub/icons-static-png/1.0.0/files/dark/doubao-color.png',
	'kimi': 'https://unpkg.com/@lobehub/icons-static-png@1.74.0/light/moonshot.png',
	'ernie': 'https://registry.npmmirror.com/@lobehub/icons-static-png/1.0.0/files/dark/wenxin-color.png',
	'ds': 'https://registry.npmmirror.com/@lobehub/icons-static-png/1.0.0/files/dark/deep-seek-color.png',
	'o3': 'https://unpkg.com/@lobehub/icons-static-png@1.74.0/light/openai.png',
	'gemma': 'https://registry.npmmirror.com/@lobehub/icons-static-png/1.0.0/files/dark/gemma-color.png',
	'phi': 'https://unpkg.com/@lobehub/icons-static-png@1.74.0/light/microsoft-color.png',
	}


	def get_model_icon_html(model_name):
	"""Get HTML image tag for model icon based on name"""
	model_lower = model_name.lower()
	icon_url = None

	# 特殊处理 DS- 开头的模型(DeepSeek简称)
	if model_lower.startswith('ds-'):
	icon_url = MODEL_ICON_URLS.get('deepseek')
	else:
	# 按关键词匹配
	for key, url in MODEL_ICON_URLS.items():
	if key in model_lower:
	icon_url = url
	break

	if icon_url:
	return f'<img src="{icon_url}" width="20" height="20" style="vertical-align: middle; margin-right: 8px;" onerror="this.style.display=\'none\'">'
	return ''


	def format_model_name_with_icon(model_name):
	"""Add icon to model name using HTML"""
	icon_html = get_model_icon_html(model_name)
	return f"{icon_html}{model_name}"


	# Load CSV data with caching
	def load_csv_data():
	global _cached_df
	if _cached_df is None:
	import gzip
	try:
	# Try to load compressed file first
	with gzip.open("panda-bench.csv.gz", 'rt', encoding='utf-8') as f:
	_cached_df = pd.read_csv(f)
	except FileNotFoundError:
	try:
	# Fallback to uncompressed file
	_cached_df = pd.read_csv("panda-bench.csv")
	except FileNotFoundError:
	# Create dummy data for testing
	_cached_df = pd.DataFrame({
	'model_name': ['claude-3-5-sonnet', 'gpt-4o', 'gemini-pro'],
	'attack_method': ['DEV_MODE_V2', 'DEV_MODE_V2', 'DEV_MODE_V2'],
	'jailbreak_type': ['Expert advice', 'Economic harm', 'Expert advice'],
	'GCG': [21.27, 40, 35],
	'defense_method': ['Paraphrase', 'SelfRemind', 'Paraphrase']
	})
	print(f"CSV loaded with {len(_cached_df)} rows")
	return _cached_df


	def get_unique_models():
	global _cached_models
	if _cached_models is None:
	df = load_csv_data()
	_cached_models = sorted(df['model_name'].unique())
	return _cached_models


	def get_unique_jailbreak_types():
	global _cached_jailbreak_types
	if _cached_jailbreak_types is None:
	df = load_csv_data()
	_cached_jailbreak_types = sorted(df['jailbreak_type'].unique())
	return _cached_jailbreak_types


	def get_attack_methods():
	global _cached_attack_methods
	if _cached_attack_methods is None:
	df = load_csv_data()
	_cached_attack_methods = sorted(df['attack_method'].unique())
	return _cached_attack_methods


	def get_evaluation_methods():
	return ["GCG", "PAIR_gpt-4o-2024-11-20", "PAIR_Qwen_Qwen2.5-72B-Instruct", "PAIR_meta-llama_Llama-3.3-70B-Instruct"]


	def get_defense_methods():
	global _cached_defense_methods
	if _cached_defense_methods is None:
	df = load_csv_data()
	_cached_defense_methods = sorted(df['defense_method'].unique())
	return _cached_defense_methods


	def format_data(value):
	if isinstance(value, (int, float)) and not np.isnan(value):
	return round(value, 2)
	return "N/A"


	def create_styled_dataframe(headers, data, sort_by_col=None):
	"""Create a styled dataframe with color highlighting for top 3"""
	if not data:
	return gr.Dataframe(headers=headers, value=data, interactive=False,
	datatype=["html"] + ["number"] * (len(headers) - 1))

	df = pd.DataFrame(data, columns=headers)

	# If sort_by_col is specified, sort by that column
	if sort_by_col and sort_by_col in df.columns:
	# Convert to numeric for sorting
	df[sort_by_col] = pd.to_numeric(df[sort_by_col], errors='coerce')
	df = df.sort_values(by=sort_by_col, ascending=True) # Lower is better
	df = df.reset_index(drop=True)

	# Add Rank column with medal icons for top 3
	rank_column = []
	for i in range(len(df)):
	if i == 0:
	rank_column.append("🥇 1")
	elif i == 1:
	rank_column.append("🥈 2")
	elif i == 2:
	rank_column.append("🥉 3")
	else:
	rank_column.append(str(i + 1))

	# Insert Rank column at the beginning
	df.insert(0, 'Rank', rank_column)
	headers = ['Rank'] + headers

	# Add icons to model names - now check second column if Rank exists
	model_col_idx = 1 if 'Rank' in headers else 0
	if 'Model' in headers[model_col_idx]:
	df.iloc[:, model_col_idx] = df.iloc[:, model_col_idx].apply(format_model_name_with_icon)

	# Convert back to list for Gradio
	styled_data = df.values.tolist()

	# Set datatype: Rank and Model columns are HTML, rest are numbers
	if 'Rank' in headers:
	datatypes = ["html", "html"] + ["number"] * (len(headers) - 2)
	else:
	datatypes = ["html"] + ["number"] * (len(headers) - 1)

	return gr.Dataframe(
	headers=headers,
	value=styled_data,
	interactive=False,
	datatype=datatypes,
	wrap=True
	)


	# Model view: show performance across different attack methods for selected models
	def filter_by_model(selected_models, selected_jailbreak_types):
	df = load_csv_data()

	if not selected_models:
	selected_models = get_unique_models()
	if not selected_jailbreak_types:
	selected_jailbreak_types = get_unique_jailbreak_types()

	filtered_df = df[
	(df['model_name'].isin(selected_models)) &
	(df['jailbreak_type'].isin(selected_jailbreak_types))
	]

	attack_methods = get_evaluation_methods()
	display_data = []

	column_map = {
	"GCG": "GCG",
	"PAIR_gpt-4o-2024-11-20": "PAIR_gpt-4o-2024-11-20",
	"PAIR_Qwen_Qwen2.5-72B-Instruct": "PAIR_Qwen_Qwen2.5-72B-Instruct",
	"PAIR_meta-llama_Llama-3.3-70B-Instruct": "PAIR_meta-llama_Llama-3.3-70B-Instruct"
	}

	for model in selected_models:
	model_data = filtered_df[filtered_df['model_name'] == model]

	if model_data.empty:
	continue

	row = [model]
	all_scores = []

	for attack in attack_methods:
	col_name = column_map.get(attack)
	if col_name and col_name in model_data.columns:
	scores = pd.to_numeric(model_data[col_name], errors='coerce').dropna()
	if len(scores) > 0:
	avg_score = scores.mean()
	row.append(format_data(avg_score))
	all_scores.extend(scores.tolist())
	else:
	row.append(format_data(np.nan))
	else:
	row.append(format_data(np.nan))

	overall_avg = np.mean(all_scores) if all_scores else np.nan
	row.append(format_data(overall_avg))

	display_data.append(row)

	headers = ["Model"] + [f"{method} ⬇️" for method in attack_methods] + ["Overall Avg ⬇️"]

	return create_styled_dataframe(headers, display_data, sort_by_col="Overall Avg ⬇️")


	# Attack view: show performance of different models for selected attack methods
	def filter_by_attack(selected_attacks, selected_evaluation_methods):
	df = load_csv_data()

	if not selected_attacks:
	selected_attacks = get_attack_methods()

	if isinstance(selected_evaluation_methods, str):
	selected_evaluation_methods = [selected_evaluation_methods]
	elif not selected_evaluation_methods:
	selected_evaluation_methods = ["GCG"]

	filtered_df = df[df['attack_method'].isin(selected_attacks)]
	models = get_unique_models()

	eval_column_map = {
	"GCG": "GCG",
	"PAIR_gpt-4o-2024-11-20": "PAIR_gpt-4o-2024-11-20",
	"PAIR_Qwen_Qwen2.5-72B-Instruct": "PAIR_Qwen_Qwen2.5-72B-Instruct",
	"PAIR_meta-llama_Llama-3.3-70B-Instruct": "PAIR_meta-llama_Llama-3.3-70B-Instruct"
	}

	display_data = []

	for model in models:
	model_data = filtered_df[filtered_df['model_name'] == model]
	if model_data.empty:
	continue

	row = [model]
	all_attack_scores = []

	for attack in selected_attacks:
	attack_data = model_data[model_data['attack_method'] == attack]

	if attack_data.empty:
	row.append(format_data(np.nan))
	continue

	attack_scores = []
	for eval_method in selected_evaluation_methods:
	eval_column = eval_column_map.get(eval_method)
	if eval_column and eval_column in attack_data.columns:
	scores = pd.to_numeric(attack_data[eval_column], errors='coerce').dropna()
	attack_scores.extend(scores.tolist())

	avg_score = np.mean(attack_scores) if attack_scores else np.nan
	row.append(format_data(avg_score))

	if attack_scores:
	all_attack_scores.extend(attack_scores)

	overall_avg = np.mean(all_attack_scores) if all_attack_scores else np.nan

	if np.isnan(overall_avg):
	row.insert(1, None)
	else:
	row.insert(1, round(overall_avg, 2))

	display_data.append(row)

	eval_method_name = selected_evaluation_methods[0] if selected_evaluation_methods else "N/A"
	headers = [f"Model ({eval_method_name})", "AVG ⬇️"] + [f"{attack} ⬇️" for attack in selected_attacks]

	if display_data:
	df_result = pd.DataFrame(display_data, columns=headers)
	df_result["AVG ⬇️"] = pd.to_numeric(df_result["AVG ⬇️"], errors='coerce')

	for col in df_result.columns[2:]:
	df_result[col] = pd.to_numeric(df_result[col].astype(str).str.replace('N/A', ''), errors='coerce')

	display_data = df_result.values.tolist()

	return create_styled_dataframe(headers, display_data, sort_by_col="AVG ⬇️")


	# Defense view: show performance of different defense methods
	def filter_by_defense(selected_defenses, selected_evaluation_methods):
	df = load_csv_data()

	if not selected_defenses:
	selected_defenses = get_defense_methods()

	if isinstance(selected_evaluation_methods, str):
	selected_evaluation_methods = [selected_evaluation_methods]
	elif not selected_evaluation_methods:
	selected_evaluation_methods = ["GCG"]

	filtered_df = df[df['defense_method'].isin(selected_defenses)]
	models = get_unique_models()

	eval_column_map = {
	"GCG": "GCG",
	"PAIR_gpt-4o-2024-11-20": "PAIR_gpt-4o-2024-11-20",
	"PAIR_Qwen_Qwen2.5-72B-Instruct": "PAIR_Qwen_Qwen2.5-72B-Instruct",
	"PAIR_meta-llama_Llama-3.3-70B-Instruct": "PAIR_meta-llama_Llama-3.3-70B-Instruct"
	}

	display_data = []

	for model in models:
	model_data = filtered_df[filtered_df['model_name'] == model]
	if model_data.empty:
	continue

	row = [model]
	all_defense_scores = []

	for defense in selected_defenses:
	defense_data = model_data[model_data['defense_method'] == defense]

	if defense_data.empty:
	row.append(format_data(np.nan))
	continue

	defense_scores = []
	for eval_method in selected_evaluation_methods:
	eval_column = eval_column_map.get(eval_method)
	if eval_column and eval_column in defense_data.columns:
	scores = pd.to_numeric(defense_data[eval_column], errors='coerce').dropna()
	defense_scores.extend(scores.tolist())

	avg_score = np.mean(defense_scores) if defense_scores else np.nan
	row.append(format_data(avg_score))

	if defense_scores:
	all_defense_scores.extend(defense_scores)

	overall_avg = np.mean(all_defense_scores) if all_defense_scores else np.nan

	if np.isnan(overall_avg):
	row.insert(1, None)
	else:
	row.insert(1, round(overall_avg, 2))

	display_data.append(row)

	eval_method_name = selected_evaluation_methods[0] if selected_evaluation_methods else "N/A"
	headers = [f"Model ({eval_method_name})", "AVG ⬇️"] + [f"{defense} ⬇️" for defense in selected_defenses]

	if display_data:
	df_result = pd.DataFrame(display_data, columns=headers)
	df_result["AVG ⬇️"] = pd.to_numeric(df_result["AVG ⬇️"], errors='coerce')

	for col in df_result.columns[2:]:
	df_result[col] = pd.to_numeric(df_result[col].astype(str).str.replace('N/A', ''), errors='coerce')

	display_data = df_result.values.tolist()

	return create_styled_dataframe(headers, display_data, sort_by_col="AVG ⬇️")


	# Overview: comprehensive statistics
	def filter_overview():
	df = load_csv_data()

	jailbreak_types = get_unique_jailbreak_types()
	attack_methods = get_evaluation_methods()
	display_data = []

	column_map = {
	"GCG": "GCG",
	"PAIR_gpt-4o-2024-11-20": "PAIR_gpt-4o-2024-11-20",
	"PAIR_Qwen_Qwen2.5-72B-Instruct": "PAIR_Qwen_Qwen2.5-72B-Instruct",
	"PAIR_meta-llama_Llama-3.3-70B-Instruct": "PAIR_meta-llama_Llama-3.3-70B-Instruct"
	}

	for jailbreak_type in jailbreak_types:
	type_data = df[df['jailbreak_type'] == jailbreak_type]
	if type_data.empty:
	continue

	row = [jailbreak_type]

	all_scores = []
	for attack in attack_methods:
	col_name = column_map.get(attack)
	if col_name and col_name in type_data.columns:
	scores = pd.to_numeric(type_data[col_name], errors='coerce').dropna()
	avg_score = scores.mean() if len(scores) > 0 else np.nan
	all_scores.extend(scores.tolist())
	else:
	avg_score = np.nan
	row.append(format_data(avg_score))

	overall_avg = np.mean(all_scores) if all_scores else np.nan
	row.append(format_data(overall_avg))

	display_data.append(row)

	headers = ["Jailbreak Type"] + [f"{method} Avg ⬇️" for method in attack_methods] + ["Overall Avg ⬇️"]
	return headers, display_data


	# Gradio Interface with comprehensive CSS targeting
	with gr.Blocks(title="Jailbreak Attack Results Leaderboard", css="""
	/* 第一名 - 浅金色背景 */
	table tbody tr:nth-child(1),
	table tbody tr:nth-child(1) > *,
	.dataframe tbody tr:nth-child(1),
	.dataframe tbody tr:nth-child(1) > *,
	div[data-testid="dataframe"] table tbody tr:nth-child(1),
	div[data-testid="dataframe"] table tbody tr:nth-child(1) > * {
	background-color: #FFF9E6 !important;
	}

	/* 第二名 - 浅灰色背景 */
	table tbody tr:nth-child(2),
	table tbody tr:nth-child(2) > *,
	.dataframe tbody tr:nth-child(2),
	.dataframe tbody tr:nth-child(2) > *,
	div[data-testid="dataframe"] table tbody tr:nth-child(2),
	div[data-testid="dataframe"] table tbody tr:nth-child(2) > * {
	background-color: #F5F5F5 !important;
	}

	/* 第三名 - 浅橙色背景 */
	table tbody tr:nth-child(3),
	table tbody tr:nth-child(3) > *,
	.dataframe tbody tr:nth-child(3),
	.dataframe tbody tr:nth-child(3) > *,
	div[data-testid="dataframe"] table tbody tr:nth-child(3),
	div[data-testid="dataframe"] table tbody tr:nth-child(3) > * {
	background-color: #FFF0E6 !important;
	}

	/* 悬停效果 - 只对第4名及以后生效 */
	table tbody tr:nth-child(n+4):hover,
	table tbody tr:nth-child(n+4):hover > *,
	.dataframe tbody tr:nth-child(n+4):hover,
	.dataframe tbody tr:nth-child(n+4):hover > * {
	background-color: #E8F4F8 !important;
	}

	/* 前三名悬停时保持原背景色 */
	table tbody tr:nth-child(1):hover,
	table tbody tr:nth-child(1):hover > *,
	.dataframe tbody tr:nth-child(1):hover,
	.dataframe tbody tr:nth-child(1):hover > * {
	background-color: #FFF9E6 !important;
	}

	table tbody tr:nth-child(2):hover,
	table tbody tr:nth-child(2):hover > *,
	.dataframe tbody tr:nth-child(2):hover,
	.dataframe tbody tr:nth-child(2):hover > * {
	background-color: #F5F5F5 !important;
	}

	table tbody tr:nth-child(3):hover,
	table tbody tr:nth-child(3):hover > *,
	.dataframe tbody tr:nth-child(3):hover,
	.dataframe tbody tr:nth-child(3):hover > * {
	background-color: #FFF0E6 !important;
	}
	""") as app:
	# 预加载数据
	print("Preloading data...")
	load_csv_data()
	print("Data preloaded successfully!")

	gr.Markdown(
	"""
	# 🛡️ Jailbreak Attack Results Leaderboard

	Analyze model performance against different jailbreak attacks across various categories.
	Lower scores indicate better resistance to jailbreak attempts.

	🥇 Gold = 1st Place \| 🥈 Silver = 2nd Place \| 🥉 Bronze = 3rd Place
	"""
	)

	with gr.Tabs():
	with gr.Tab("ℹ️ Information"):
	gr.Markdown(
	"""
	## 📖 About This Leaderboard

	This dashboard displays results from jailbreak attack experiments on various language models.

	### 🏆 Ranking System:
	- 🥇 1st Place: Best performing model (lowest score) - Light gold background
	- 🥈 2nd Place: Second best performing model - Light gray background
	- 🥉 3rd Place: Third best performing model - Light orange background

	### Usage:
	- Model View: Compare how different models perform against various evaluation methods
	- Attack View: Compare how different attacks perform against various models
	- Defense View: Compare how different defense methods protect against various models
	- Jailbreak Type View: Get overall statistics across all jailbreak types

	### Model Icons:
	Official logos from respective companies (mixed CDN strategy for optimal loading)

	### Judgement Methods:
	- GCG: Greedy Coordinate Gradient attack
	- PAIR_gpt-4o: PAIR attack using GPT-4o
	- PAIR_Qwen: PAIR attack using Qwen model
	- PAIR_meta-llama: PAIR attack using Llama model
	---
	"""
	)

	with gr.Tab("🤖 Model View"):
	gr.Markdown("### Compare how models perform against various evaluation methods")

	with gr.Row():
	model_select_all = gr.Button("✓ Select All Models", size="sm")
	model_clear_all = gr.Button("✗ Clear All Models", size="sm")

	model_checkbox = gr.CheckboxGroup(
	choices=get_unique_models(),
	label="📋 Select Models",
	value=get_unique_models()
	)

	with gr.Row():
	jailbreak_select_all = gr.Button("✓ Select All Jailbreak Types", size="sm")
	jailbreak_clear_all = gr.Button("✗ Clear All Jailbreak Types", size="sm")

	jailbreak_type_checkbox = gr.CheckboxGroup(
	choices=get_unique_jailbreak_types(),
	label="🎯 Select Jailbreak Types",
	value=get_unique_jailbreak_types()
	)

	model_table = gr.Dataframe(interactive=False)


	def update_model_view(models, jailbreak_types):
	return filter_by_model(models, jailbreak_types)


	# 全选/取消全选按钮事件
	model_select_all.click(
	fn=lambda: get_unique_models(),
	outputs=model_checkbox
	)
	model_clear_all.click(
	fn=lambda: [],
	outputs=model_checkbox
	)
	jailbreak_select_all.click(
	fn=lambda: get_unique_jailbreak_types(),
	outputs=jailbreak_type_checkbox
	)
	jailbreak_clear_all.click(
	fn=lambda: [],
	outputs=jailbreak_type_checkbox
	)

	for component in [model_checkbox, jailbreak_type_checkbox]:
	component.change(
	fn=update_model_view,
	inputs=[model_checkbox, jailbreak_type_checkbox],
	outputs=model_table
	)

	app.load(
	fn=update_model_view,
	inputs=[model_checkbox, jailbreak_type_checkbox],
	outputs=model_table
	)

	with gr.Tab("⚔️ Attack View"):
	gr.Markdown("### Compare attack methods across different models")

	with gr.Row():
	attack_select_all = gr.Button("✓ Select All Attacks", size="sm")
	attack_clear_all = gr.Button("✗ Clear All Attacks", size="sm")

	attack_checkbox = gr.CheckboxGroup(
	choices=get_attack_methods(),
	label="🎯 Select Attack Methods",
	value=get_attack_methods()
	)

	evaluation_method_radio = gr.Radio(
	choices=get_evaluation_methods(),
	label="📊 Select Evaluation Method",
	value="GCG"
	)

	attack_table = gr.Dataframe(interactive=False)


	def update_attack_view(attacks, eval_method):
	return filter_by_attack(attacks, [eval_method])


	# 全选/取消全选按钮事件
	attack_select_all.click(
	fn=lambda: get_attack_methods(),
	outputs=attack_checkbox
	)
	attack_clear_all.click(
	fn=lambda: [],
	outputs=attack_checkbox
	)

	for component in [attack_checkbox, evaluation_method_radio]:
	component.change(
	fn=update_attack_view,
	inputs=[attack_checkbox, evaluation_method_radio],
	outputs=attack_table
	)

	app.load(
	fn=update_attack_view,
	inputs=[attack_checkbox, evaluation_method_radio],
	outputs=attack_table
	)

	with gr.Row():
	with gr.Column():
	gr.Markdown("### 📈 Attack Model Visualization (rule-based GCG judge) ")
	gr.Image(
	value="./figs/GCG_attack_model.jpg",
	interactive=True
	)

	with gr.Column():
	gr.Markdown("### 📈 Attack Model Visualization (gpt-4o-based PAIR judge)")
	gr.Image(
	value="./figs/attack_model_heatmap.jpg",
	interactive=True
	)

	with gr.Column():
	gr.Markdown("### 📈 Attack Model Visualization (Llama-3.3-70B-based PAIR judge) ")
	gr.Image(
	value="./figs/PAIR_llama_attack_model.jpg",
	interactive=True
	)

	with gr.Column():
	gr.Markdown("### 📈 Attack Model Visualization (Qwen2.5-72B-based PAIR judge)")
	gr.Image(
	value="./figs/PAIR_qwen_attack_model.jpg",
	interactive=True
	)

	with gr.Tab("🛡️ Defense View"):
	gr.Markdown("### Compare defense methods against different attacks")

	with gr.Row():
	defense_select_all = gr.Button("✓ Select All Defenses", size="sm")
	defense_clear_all = gr.Button("✗ Clear All Defenses", size="sm")

	defense_checkbox = gr.CheckboxGroup(
	choices=get_defense_methods(),
	label="🛡️ Select Defense Methods",
	value=get_defense_methods()
	)

	evaluation_method_radio_defense = gr.Radio(
	choices=get_evaluation_methods(),
	label="📊 Select Evaluation Method",
	value="GCG"
	)

	defense_table = gr.Dataframe(interactive=False)


	def update_defense_view(defenses, eval_method):
	return filter_by_defense(defenses, [eval_method])


	# 全选/取消全选按钮事件
	defense_select_all.click(
	fn=lambda: get_defense_methods(),
	outputs=defense_checkbox
	)
	defense_clear_all.click(
	fn=lambda: [],
	outputs=defense_checkbox
	)

	for component in [defense_checkbox, evaluation_method_radio_defense]:
	component.change(
	fn=update_defense_view,
	inputs=[defense_checkbox, evaluation_method_radio_defense],
	outputs=defense_table
	)

	app.load(
	fn=update_defense_view,
	inputs=[defense_checkbox, evaluation_method_radio_defense],
	outputs=defense_table
	)

	with gr.Row():
	with gr.Column():
	gr.Markdown("### 📈 Defense Model Visualization (rule-based GCG judge) ")
	gr.Image(
	value="./figs/GCG_defense_model.jpg",
	interactive=True
	)

	with gr.Column():
	gr.Markdown("### 📈 Defense Model Visualization (gpt-4o-based PAIR judge)")
	gr.Image(
	value="./figs/defense_model_heatmap.jpg",
	interactive=True
	)

	with gr.Column():
	gr.Markdown("### 📈 Defense Model Visualization (Llama-3.3-70B-based PAIR judge) ")
	gr.Image(
	value="./figs/PAIR_llama_defense_model.jpg",
	interactive=True
	)

	with gr.Column():
	gr.Markdown("### 📈 Defense Model Visualization (Qwen2.5-72B-based PAIR judge)")
	gr.Image(
	value="./figs/PAIR_qwen_defense_model.jpg",
	interactive=True
	)

	with gr.Tab("📊 Jailbreak Type View"):
	gr.Markdown("### Comprehensive statistics across all dimensions")

	overview_table = gr.Dataframe(interactive=False)


	def update_overview():
	headers, data = filter_overview()
	return gr.Dataframe(headers=headers, value=data, interactive=False)


	app.load(
	fn=update_overview,
	outputs=overview_table
	)

	if __name__ == "__main__":
	app.launch(share=False)