LLM-PII-Detection-Leaderboard

Sleeping

LLM-PII-Detection-Leaderboard / pii_leaderboard.py

Luis Kalckstein

New benchmarking results from improved dataset and contribution notebook

ebc9315 unverified 5 months ago

32.6 kB

	import gradio as gr
	import pandas as pd
	import tempfile
	import os
	from data_loader import (
	load_data,
	PII_CATEGORIES,
	HEADER_CONTENT,
	METHODOLOGY,
	COLORS,
	MODEL_TYPES
	)

	def get_rank_badge(rank):
	"""Generate HTML for rank badge with appropriate styling"""
	badge_styles = {
	1: ("1st", COLORS['digital_pollen'], COLORS['warm_black']),
	2: ("2nd", COLORS['soft_grey'], COLORS['black']),
	3: ("3rd", COLORS['code_coral'], COLORS['black']),
	}

	if rank in badge_styles:
	label, bg_color, text_color = badge_styles[rank]
	return f"""
	<div style="
	display: inline-flex;
	align-items: center;
	justify-content: center;
	min-width: 48px;
	padding: 4px 12px;
	background: {bg_color};
	color: {text_color} !important;
	border-radius: 6px;
	font-weight: 600;
	font-size: 0.9em;
	box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
	font-family: 'Archivo', sans-serif;
	">
	{label}
	</div>
	"""
	return f"""
	<div style="
	display: inline-flex;
	align-items: center;
	justify-content: center;
	min-width: 28px;
	color: var(--text-secondary);
	font-weight: 500;
	">
	{rank}
	</div>
	"""

	def get_type_badge(model_type):
	"""Generate HTML for model type badge"""
	bg_color = COLORS['digital_pollen'] if model_type == 'Proprietary' else COLORS['data_green']
	# Use black text for better readability on brand accent backgrounds
	text_color = '#000000'
	return f"""
	<div style="
	display: inline-flex;
	align-items: center;
	padding: 4px 8px;
	background: {bg_color};
	color: {text_color} !important;
	border-radius: 4px;
	font-size: 0.85em;
	font-weight: 600;
	font-family: 'Archivo', sans-serif;
	">
	{model_type}
	</div>
	"""

	def get_score_bar(score, is_inverse=False):
	"""Generate HTML for score bar with gradient styling"""
	if pd.isna(score) or score == '':
	score = 0
	else:
	score = float(score)

	width = score * 100

	# For over-detection rate, use inverse coloring (lower is better)
	if is_inverse:
	bar_color = COLORS['code_coral'] if score > 0.5 else COLORS['data_green']
	else:
	bar_color = COLORS['data_green'] if score > 0.5 else COLORS['code_coral']

	return f"""
	<div style="display: flex; align-items: center; gap: 12px; width: 100%;">
	<div style="
	flex-grow: 1;
	height: 8px;
	background: rgba(239, 235, 231, 0.1);
	border-radius: 4px;
	overflow: hidden;
	max-width: 200px;
	">
	<div style="
	width: {width}%;
	height: 100%;
	background: {bar_color};
	border-radius: 4px;
	transition: width 0.3s ease;
	"></div>
	</div>
	<span style="
	font-family: 'SF Mono', monospace;
	font-weight: 700;
	color: #000000;
	min-width: 60px;
	">{score:.3f}</span>
	</div>
	"""

	def create_pii_leaderboard():
	"""Create the main PII detection leaderboard interface"""

	def load_leaderboard_data():
	"""Load and prepare the leaderboard data"""
	return load_data()

	def generate_html_table(filtered_df, document_type, sort_by):
	"""Generate styled HTML table with rank badges and score bars"""
	table_html = """
	<div class="v2-table-container">
	<table class="v2-styled-table">
	<thead>
	<tr>
	<th style="width: 80px;">Rank</th>
	<th style="width: 250px;">Model</th>
	<th style="width: 150px;">F1 Score</th>
	<th style="width: 150px;">Recall</th>
	<th style="width: 150px;">Precision</th>
	<th style="width: 200px;">Overall Accuracy</th>
	<th style="width: 160px;">Over-detection Rate</th>
	<th>Cost/Doc ($)</th>
	<th>Time (s)</th>
	<th style="width: 120px;">Type</th>
	<th>Vendor</th>
	</tr>
	</thead>
	<tbody>
	"""

	# Generate table rows
	for idx, (_, row) in enumerate(filtered_df.iterrows()):
	rank = idx + 1
	table_html += f"""
	<tr>
	<td>{get_rank_badge(rank)}</td>
	<td class="model-name" style="color:#EFEBE7; font-weight:700;">{row['Model']}</td>
	"""

	# Get appropriate values based on document type filter
	if document_type != "All":
	# For specific document type, show domain-specific scores
	accuracy_col = f'{document_type} Accuracy'
	accuracy = row.get(accuracy_col, row.get('Overall Accuracy', ''))
	else:
	# For "All", show overall accuracy
	accuracy = row.get('Overall Accuracy', '')

	precision = row.get('Precision', '')
	recall = row.get('Recall', '')
	f1 = row.get('F1 Score', '')
	over_detection = row.get('Over-redaction Rate', '')
	cost = row.get('Cost per Document ($)', '')
	time = row.get('Processing Time (s)', '')

	# Add score bars in new order: F1, Recall, Precision, Accuracy
	if f1 != '':
	table_html += f'<td class="score-cell">{get_score_bar(f1)}</td>'
	else:
	table_html += '<td class="numeric-cell">-</td>'

	if recall != '':
	table_html += f'<td class="score-cell">{get_score_bar(recall)}</td>'
	else:
	table_html += '<td class="numeric-cell">-</td>'

	if precision != '':
	table_html += f'<td class="score-cell">{get_score_bar(precision)}</td>'
	else:
	table_html += '<td class="numeric-cell">-</td>'

	if accuracy != '':
	table_html += f'<td class="score-cell">{get_score_bar(accuracy)}</td>'
	else:
	table_html += '<td class="numeric-cell">-</td>'

	if over_detection != '':
	table_html += f'<td class="score-cell">{get_score_bar(over_detection, is_inverse=True)}</td>'
	else:
	table_html += '<td class="numeric-cell">-</td>'

	# Format cost and time
	if cost != '':
	cost_display = f'${float(cost):.3f}'
	else:
	cost_display = '-'

	if time != '':
	time_display = f'{float(time):.1f}'
	else:
	time_display = '-'

	table_html += f"""
	<td class="numeric-cell">{cost_display}</td>
	<td class="numeric-cell">{time_display}</td>
	<td>{get_type_badge(row['Model Type'])}</td>
	<td>{row['Vendor']}</td>
	</tr>
	"""

	table_html += """
	</tbody>
	</table>
	</div>
	"""

	return table_html

	def filter_and_sort_data(document_type, model_type_filter, sort_by, sort_order):
	"""Filter and sort the leaderboard data"""
	df = load_leaderboard_data()
	filtered_df = df.copy()

	# Document type filtering
	if document_type != "All":
	# Only show models that have data for this document type
	doc_col = f'{document_type} Accuracy'
	if doc_col in filtered_df.columns:
	filtered_df = filtered_df[filtered_df[doc_col] != '']

	# Model type filtering
	if model_type_filter != "All":
	if model_type_filter == "Open Source":
	filtered_df = filtered_df[filtered_df['Model Type'] == 'Open Source']
	elif model_type_filter == "Proprietary":
	filtered_df = filtered_df[filtered_df['Model Type'] == 'Proprietary']

	# Sorting
	sort_column = sort_by
	if document_type != "All" and sort_by == 'Overall Accuracy':
	sort_column = f'{document_type} Accuracy'

	if sort_column in filtered_df.columns:
	ascending = (sort_order == "Ascending")
	# For over-detection rate, flip the logic (lower is better)
	if sort_by == "Over-redaction Rate":
	ascending = not ascending
	filtered_df = filtered_df.sort_values(by=sort_column, ascending=ascending, na_position='last')

	return generate_html_table(filtered_df, document_type, sort_by)

	def generate_performance_card(model_name):
	"""Generate HTML for the model performance card"""
	if not model_name:
	return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;">
	Please select a model to generate its performance card
	</div>"""

	df = load_leaderboard_data()
	model_data = df[df['Model'] == model_name]

	if model_data.empty:
	return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;">
	Model not found in the database
	</div>"""

	row = model_data.iloc[0]

	# Get overall rank
	df_with_accuracy = df[df['Overall Accuracy'] != ''].copy()
	df_with_accuracy['Overall Accuracy'] = pd.to_numeric(df_with_accuracy['Overall Accuracy'], errors='coerce')
	df_sorted = df_with_accuracy.sort_values('Overall Accuracy', ascending=False).reset_index(drop=True)
	try:
	rank = df_sorted[df_sorted['Model'] == model_name].index[0] + 1
	except:
	rank = 'N/A'

	# Format values
	def format_value(val, decimals=3, prefix='', suffix=''):
	if pd.isna(val) or val == '':
	return 'N/A'
	return f"{prefix}{float(val):.{decimals}f}{suffix}"

	# Determine model type icon
	type_icon = "🔓" if row['Model Type'] == 'Open Source' else "🔒"

	# Calculate performance stars
	def get_performance_stars(value, max_val=1.0):
	if pd.isna(value) or value == '':
	return '⭐' * 0
	score = float(value) / max_val
	if score >= 0.9:
	return '⭐' * 5
	elif score >= 0.8:
	return '⭐' * 4
	elif score >= 0.7:
	return '⭐' * 3
	elif score >= 0.6:
	return '⭐' * 2
	else:
	return '⭐' * 1

	# Create HTML
	card_html = f"""
	<div class="performance-card">
	<div class="card-header">
	<h1 class="card-model-name">{model_name}</h1>
	<div class="card-stars">
	{get_performance_stars(row['Overall Accuracy'])}
	</div>
	</div>

	<div class="metrics-grid" style="margin-bottom: 24px;">
	<div class="metric-item">
	<div class="metric-icon" style="color: var(--accent-primary);">🏆</div>
	<div class="metric-label">Overall Rank</div>
	<div class="metric-value">#{rank}</div>
	</div>

	<div class="metric-item">
	<div class="metric-icon" style="color: var(--accent-primary);">🎯</div>
	<div class="metric-label">Overall Accuracy</div>
	<div class="metric-value">{format_value(row['Overall Accuracy'])}</div>
	</div>

	<div class="metric-item">
	<div class="metric-icon" style="color: var(--accent-secondary);">📊</div>
	<div class="metric-label">Precision</div>
	<div class="metric-value">{format_value(row['Precision'])}</div>
	</div>

	<div class="metric-item">
	<div class="metric-icon" style="color: var(--accent-tertiary);">🔍</div>
	<div class="metric-label">Recall</div>
	<div class="metric-value">{format_value(row['Recall'])}</div>
	</div>

	<div class="metric-item">
	<div class="metric-icon" style="color: var(--accent-quaternary);">💰</div>
	<div class="metric-label">Cost/Doc</div>
	<div class="metric-value">{format_value(row['Cost per Document ($)'], 3, '$')}</div>
	</div>

	<div class="metric-item">
	<div class="metric-icon" style="color: var(--text-primary);">⚡</div>
	<div class="metric-label">Processing Time</div>
	<div class="metric-value">{format_value(row['Processing Time (s)'], 1, '', 's')}</div>
	</div>
	</div>

	<div class="domains-section" style="margin-top: 24px;">
	<h3 class="domains-title">📄 Document Type Performance</h3>
	<div class="domains-grid">
	"""

	# Add document type scores
	doc_types = [
	('🏥', 'Healthcare'),
	('💰', 'Financial'),
	('🏛️', 'Government'),
	('⚖️', 'Legal'),
	('👤', 'Personal')
	]

	for doc_icon, doc_type in doc_types:
	accuracy_col = f'{doc_type} Accuracy'
	accuracy_value = row.get(accuracy_col, '')

	if accuracy_value != '' and not pd.isna(accuracy_value):
	score_display = f"{float(accuracy_value):.3f}"
	score_color = "var(--accent-primary)"
	else:
	score_display = "N/A"
	score_color = "var(--text-muted)"

	card_html += f"""
	<div class="domain-item">
	<div class="domain-name">{doc_icon}</div>
	<div style="font-size: 0.7rem; color: var(--text-secondary); margin-bottom: 2px;">{doc_type}</div>
	<div class="domain-score" style="color: {score_color};">{score_display}</div>
	</div>
	"""

	card_html += f"""
	</div>
	</div>

	<div class="card-footer">
	<div class="card-url">
	<strong>LLM PII Detection Leaderboard</strong>
	</div>
	</div>
	</div>
	"""

	return card_html

	# Load initial data
	initial_df = load_leaderboard_data()
	initial_table = filter_and_sort_data("All", "All", "F1 Score", "Descending")

	# Display header
	gr.HTML(HEADER_CONTENT)

	# Main leaderboard section with all filters
	gr.HTML("""
	<div class="dark-container" style="margin-bottom: 32px;">
	<div class="section-header">
	<span class="section-icon" style="color: var(--accent-primary);">📈</span>
	<h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Archivo', sans-serif; font-weight: 700;">
	PII Detection Performance Leaderboard
	</h3>
	</div>

	<!-- Dataset Reference -->
	<div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 12px; padding: 16px; margin: 16px 0 24px 0;">
	<p style="color: var(--text-primary); margin: 0 0 8px 0; font-size: 1rem; font-family: 'Archivo', sans-serif; font-weight: 600;">
	📊 <strong>Dataset</strong>: <a href="https://huggingface.co/datasets/nutrientdocs/DocPII-redaction-benchmark" style="color: var(--accent-primary); text-decoration: none;" target="_blank">DocPII: Contextual Redaction Benchmark Dataset</a>
	</p>
	<p style="color: var(--text-secondary); margin: 0; font-size: 0.95rem; font-family: 'Archivo', sans-serif; line-height: 1.4;">
	DocPII contains 1,101 high-quality document samples with embedded PII, designed to evaluate context-aware redaction systems. It provides realistic, full-document contexts across healthcare, finance, and other sectors—a notable advancement over sentence-level datasets.
	</p>
	</div>

	<p style="color: var(--text-secondary); margin-bottom: 20px; font-size: 1.1rem; font-family: 'Archivo', sans-serif;">
	Filter by document type, model access, and sort by any metric to explore performance
	</p>

	<!-- Document Type Filter -->
	<div style="margin-bottom: 24px;">
	<h4 style="color: var(--text-primary); margin-bottom: 12px; font-size: 1rem;">📄 Document Type</h4>
	""")

	document_type_filter = gr.Radio(
	choices=["All", "Healthcare", "Financial", "Government", "Legal", "Personal"],
	value="All",
	label="",
	interactive=True,
	elem_classes=["document-type-radio"]
	)

	gr.HTML("""
	</div>

	<!-- Other Filters -->
	<div style="margin-bottom: 24px;">
	<h4 style="color: var(--text-primary); margin-bottom: 12px; font-size: 1rem;">🔍 Filters & Sorting</h4>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	model_type_filter = gr.Radio(
	choices=["All", "Open Source", "Proprietary"],
	value="All",
	label="🔓 Model Access",
	elem_classes=["compact-radio"]
	)

	with gr.Column(scale=1):
	sort_by = gr.Dropdown(
	choices=["F1 Score", "Recall", "Precision", "Overall Accuracy", "Over-redaction Rate", "Cost per Document ($)", "Processing Time (s)"],
	value="F1 Score",
	label="📊 Sort By",
	elem_classes=["dropdown"]
	)

	with gr.Column(scale=1):
	sort_order = gr.Radio(
	choices=["Descending", "Ascending"],
	value="Descending",
	label="🔄 Sort Order",
	elem_classes=["compact-radio"]
	)

	gr.HTML("""
	<!-- Leaderboard Table -->
	<div style="margin-top: 24px;">
	<div class="dataframe-container">
	""")

	leaderboard_table = gr.HTML(initial_table)

	gr.HTML("""
	</div>
	</div>
	</div>""")

	# Methodology section
	gr.HTML(f"""
	<div class="dark-container" style="margin-top: 32px;">
	{METHODOLOGY}
	</div>
	""")

	# Contribution Section
	gr.HTML("""
	<div class="dark-container" style="margin-top: 32px;">
	<div class="section-header">
	<h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Archivo', sans-serif; font-weight: 700;">
	Contribute to the Leaderboard
	</h3>
	</div>

	<div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 16px; padding: 24px; margin-bottom: 24px;">
	<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 2rem; align-items: center;">
	<div>
	<h4 style="color: var(--accent-primary); margin: 0 0 16px 0; font-size: 1.2rem; font-family: 'Archivo', sans-serif; font-weight: 600;">
	Help Improve PII Detection
	</h4>
	<p style="color: var(--text-primary); margin: 0 0 16px 0; font-size: 1rem; font-family: 'Archivo', sans-serif; line-height: 1.6;">
	Join our community and contribute to advancing PII detection capabilities! We encourage researchers and developers to:
	</p>
	<ul style="color: var(--text-secondary); font-size: 0.95rem; font-family: 'Archivo', sans-serif; line-height: 1.5; margin: 0; padding-left: 20px;">
	<li style="margin-bottom: 8px;"><strong>Optimize prompts</strong> with existing models for better performance</li>
	<li style="margin-bottom: 8px;"><strong>Test your own models</strong> on the DocPII benchmark dataset</li>
	<li style="margin-bottom: 8px;"><strong>Share novel approaches</strong> and techniques for PII detection</li>
	<li style="margin-bottom: 8px;"><strong>Experiment with fine-tuning</strong> strategies for document-level context</li>
	</ul>
	</div>

	<div style="text-align: center;">
	<div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 8px; padding: 16px; margin-bottom: 16px;">
	<h4 style="color: var(--text-primary); margin: 0 0 8px 0; font-size: 1rem; font-family: 'Archivo', sans-serif; font-weight: 600;">
	Example Notebook
	</h4>
	<p style="color: var(--text-secondary); margin: 0; font-size: 0.85rem; font-family: 'Archivo', sans-serif;">
	Ready-to-run evaluation setup
	</p>
	</div>
	<a href="https://colab.research.google.com/drive/1Qs5b85jWzmpFhVO-2mo0BgECCxKAeQIP?usp=sharing"
	target="_blank"
	rel="noopener noreferrer"
	style="display: inline-block; background: var(--bg-secondary); color: var(--text-primary); border: 1px solid var(--accent-primary); padding: 10px 20px; border-radius: 6px; text-decoration: none; font-family: 'Archivo', sans-serif; font-weight: 500; font-size: 0.9rem; transition: all 0.3s ease; hover: background: var(--accent-primary);">
	Open in Google Colab
	</a>
	</div>
	</div>
	</div>

	<div style="background: linear-gradient(135deg, rgba(240, 201, 104, 0.1), rgba(239, 235, 231, 0.1)); border: 1px solid var(--accent-primary); border-radius: 16px; padding: 20px; text-align: center;">
	<h4 style="color: var(--accent-primary); margin: 0 0 12px 0; font-size: 1.1rem; font-family: 'Archivo', sans-serif; font-weight: 600;">
	How to Submit Your Results
	</h4>
	<p style="color: var(--text-primary); margin: 0 0 16px 0; font-size: 1rem; font-family: 'Archivo', sans-serif; line-height: 1.5;">
	Share your findings with the community! Submit your results along with a Google Colab notebook demonstrating your approach.
	</p>
	<div style="display: flex; justify-content: center; gap: 16px; flex-wrap: wrap;">
	<div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 8px; padding: 12px 16px; font-size: 0.9rem; font-family: 'Archivo', sans-serif;">
	<span style="color: var(--accent-primary); font-weight: 600;">1.</span>
	<span style="color: var(--text-secondary);"> Run evaluation</span>
	</div>
	<div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 8px; padding: 12px 16px; font-size: 0.9rem; font-family: 'Archivo', sans-serif;">
	<span style="color: var(--accent-primary); font-weight: 600;">2.</span>
	<span style="color: var(--text-secondary);"> Create Colab notebook</span>
	</div>
	<div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 8px; padding: 12px 16px; font-size: 0.9rem; font-family: 'Archivo', sans-serif;">
	<span style="color: var(--accent-primary); font-weight: 600;">3.</span>
	<span style="color: var(--text-secondary);"> Add Discussion in Community</span>
	</div>
	</div>
	</div>
	</div>
	""")

	# Performance Card Section
	gr.HTML("""
	<div class="dark-container" style="margin-top: 32px;">
	<div class="section-header">
	<span class="section-icon" style="color: var(--accent-primary);">🎯</span>
	<h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Archivo', sans-serif; font-weight: 700;">
	Model Performance Cards
	</h3>
	</div>
	<p style="color: var(--text-secondary); margin-bottom: 20px; font-size: 1.1rem; font-family: 'Archivo', sans-serif; text-align: center;">
	Dive deep into individual model performance across all metrics and document types
	</p>

	""")

	card_model_selector = gr.Dropdown(
	choices=initial_df['Model'].tolist(),
	value=initial_df['Model'].tolist()[0] if len(initial_df) > 0 else None,
	label="🤖 Select Model",
	info="Choose a model to view its performance card",
	elem_classes=["dropdown"]
	)

	gr.HTML("""
	</div>
	</div>

	<div style="width: 100%;">
	""")

	# Card display area
	initial_model = initial_df['Model'].tolist()[0] if len(initial_df) > 0 else None
	initial_card_html = generate_performance_card(initial_model) if initial_model else ""
	card_display = gr.HTML(value=initial_card_html, elem_id="performance-card-html")

	gr.HTML("""
	</div>

	<div style="text-align: center; margin-top: 24px; padding-top: 12px; border-top: 1px solid var(--border-subtle);">
	<span style="color: var(--text-secondary); font-family: 'Archivo', sans-serif;">Powered by <a href=\"https://nutrient.io\" target=\"_blank\" rel=\"noopener noreferrer\" style=\"color: var(--accent-primary); font-weight: 700; text-decoration: none;\">Nutrient</a></span>
	</div>
	</div>
	</div>""")

	# Add performance card CSS
	gr.HTML(f"""
	<style>
	.performance-card {{
	background: var(--bg-card);
	border: 2px solid var(--accent-primary);
	border-radius: 24px;
	padding: 32px;
	max-width: 700px;
	margin: 0 auto;
	position: relative;
	overflow: hidden;
	box-shadow:
	0 20px 40px rgba(0, 0, 0, 0.5),
	0 0 80px rgba(240, 201, 104, 0.2),
	inset 0 0 120px rgba(240, 201, 104, 0.08);
	}}

	.card-header {{
	text-align: center;
	margin-bottom: 24px;
	position: relative;
	z-index: 1;
	}}

	.card-model-name {{
	font-size: 2rem;
	font-weight: 800;
	color: var(--text-primary);
	margin-bottom: 8px;
	line-height: 1.2;
	font-family: 'Archivo', sans-serif;
	}}

	.card-stars {{
	font-size: 1.2rem;
	margin: 8px 0;
	}}

	.metrics-grid {{
	display: grid;
	grid-template-columns: repeat(2, 1fr);
	gap: 16px;
	margin: 24px 0;
	}}

	.metric-item {{
	display: flex;
	flex-direction: column;
	align-items: center;
	padding: 16px;
	background: rgba(239, 235, 231, 0.05);
	border-radius: 12px;
	border: 1px solid var(--border-subtle);
	transition: all 0.3s ease;
	}}

	.metric-item:hover {{
	transform: translateY(-2px);
	border-color: var(--accent-primary);
	box-shadow: 0 8px 16px rgba(240, 201, 104, 0.25);
	}}

	.metric-icon {{
	font-size: 1.5rem;
	margin-bottom: 8px;
	}}

	.metric-label {{
	font-size: 0.85rem;
	color: var(--text-secondary);
	margin-bottom: 4px;
	text-align: center;
	}}

	.metric-value {{
	font-size: 1.1rem;
	font-weight: 700;
	color: var(--text-primary);
	text-align: center;
	}}

	.domains-section {{
	margin-top: 24px;
	}}

	.domains-title {{
	color: var(--text-primary);
	font-size: 1.2rem;
	margin-bottom: 16px;
	text-align: center;
	}}

	.domains-grid {{
	display: grid;
	grid-template-columns: repeat(5, 1fr);
	gap: 12px;
	}}

	.domain-item {{
	display: flex;
	flex-direction: column;
	align-items: center;
	padding: 12px;
	background: rgba(239, 235, 231, 0.03);
	border-radius: 8px;
	border: 1px solid var(--border-subtle);
	transition: all 0.3s ease;
	}}

	.domain-item:hover {{
	border-color: var(--accent-primary);
	transform: scale(1.02);
	}}

	.domain-name {{
	font-size: 1.2rem;
	margin-bottom: 4px;
	}}

	.domain-score {{
	font-size: 0.9rem;
	font-weight: 600;
	}}

	.card-footer {{
	text-align: center;
	margin-top: 24px;
	padding-top: 16px;
	border-top: 1px solid var(--border-subtle);
	}}

	.card-url {{
	color: var(--text-secondary);
	font-size: 0.9rem;
	}}

	/* Additional styling for radio buttons and specific components */
	.document-type-radio .wrap {{
	display: flex !important;
	gap: 12px !important;
	flex-wrap: wrap !important;
	justify-content: center !important;
	}}

	.document-type-radio .wrap > label {{
	flex: 1 !important;
	min-width: 140px !important;
	max-width: 180px !important;
	padding: 12px 16px !important;
	background: var(--bg-card) !important;
	border: 2px solid var(--border-default) !important;
	border-radius: 12px !important;
	cursor: pointer !important;
	transition: all 0.3s ease !important;
	text-align: center !important;
	font-weight: 500 !important;
	}}

	.document-type-radio .wrap > label:hover {{
	border-color: var(--accent-primary) !important;
	transform: translateY(-2px) !important;
	}}

	.document-type-radio .wrap > label:has(input[type="radio"]:checked) {{
	background: transparent !important;
	border-color: var(--accent-primary) !important;
	color: var(--text-primary) !important;
	font-weight: 600 !important;
	box-shadow: 0 8px 16px var(--glow-primary) !important;
	}}

	.document-type-radio input[type="radio"] {{
	display: none !important;
	}}

	.compact-radio .wrap > label {{
	padding: 8px 12px !important;
	font-size: 0.85rem !important;
	min-width: auto !important;
	max-width: 120px !important;
	}}
	</style>
	""")

	# Update functions
	def update_table(*args):
	return filter_and_sort_data(*args)

	def update_card(model_name):
	return generate_performance_card(model_name)

	# Connect update functions to components
	filter_inputs = [document_type_filter, model_type_filter, sort_by, sort_order]

	for input_component in filter_inputs:
	input_component.change(
	fn=update_table,
	inputs=filter_inputs,
	outputs=[leaderboard_table]
	)

	# Update card when model selection changes
	card_model_selector.change(
	fn=update_card,
	inputs=[card_model_selector],
	outputs=[card_display]
	)

	def create_app():
	"""Create the main Gradio application"""
	with gr.Blocks(
	theme=gr.themes.Default(),
	title="🔒 LLM PII Detection Leaderboard"
	) as app:
	create_pii_leaderboard()

	return app

	if __name__ == "__main__":
	demo = create_app()
	demo.launch()