LLM-PII-Detection-Leaderboard / pii_leaderboard.py
Luis Kalckstein
New benchmarking results from improved dataset and contribution notebook
ebc9315 unverified
import gradio as gr
import pandas as pd
import tempfile
import os
from data_loader import (
load_data,
PII_CATEGORIES,
HEADER_CONTENT,
METHODOLOGY,
COLORS,
MODEL_TYPES
)
def get_rank_badge(rank):
"""Generate HTML for rank badge with appropriate styling"""
badge_styles = {
1: ("1st", COLORS['digital_pollen'], COLORS['warm_black']),
2: ("2nd", COLORS['soft_grey'], COLORS['black']),
3: ("3rd", COLORS['code_coral'], COLORS['black']),
}
if rank in badge_styles:
label, bg_color, text_color = badge_styles[rank]
return f"""
<div style="
display: inline-flex;
align-items: center;
justify-content: center;
min-width: 48px;
padding: 4px 12px;
background: {bg_color};
color: {text_color} !important;
border-radius: 6px;
font-weight: 600;
font-size: 0.9em;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
font-family: 'Archivo', sans-serif;
">
{label}
</div>
"""
return f"""
<div style="
display: inline-flex;
align-items: center;
justify-content: center;
min-width: 28px;
color: var(--text-secondary);
font-weight: 500;
">
{rank}
</div>
"""
def get_type_badge(model_type):
"""Generate HTML for model type badge"""
bg_color = COLORS['digital_pollen'] if model_type == 'Proprietary' else COLORS['data_green']
# Use black text for better readability on brand accent backgrounds
text_color = '#000000'
return f"""
<div style="
display: inline-flex;
align-items: center;
padding: 4px 8px;
background: {bg_color};
color: {text_color} !important;
border-radius: 4px;
font-size: 0.85em;
font-weight: 600;
font-family: 'Archivo', sans-serif;
">
{model_type}
</div>
"""
def get_score_bar(score, is_inverse=False):
"""Generate HTML for score bar with gradient styling"""
if pd.isna(score) or score == '':
score = 0
else:
score = float(score)
width = score * 100
# For over-detection rate, use inverse coloring (lower is better)
if is_inverse:
bar_color = COLORS['code_coral'] if score > 0.5 else COLORS['data_green']
else:
bar_color = COLORS['data_green'] if score > 0.5 else COLORS['code_coral']
return f"""
<div style="display: flex; align-items: center; gap: 12px; width: 100%;">
<div style="
flex-grow: 1;
height: 8px;
background: rgba(239, 235, 231, 0.1);
border-radius: 4px;
overflow: hidden;
max-width: 200px;
">
<div style="
width: {width}%;
height: 100%;
background: {bar_color};
border-radius: 4px;
transition: width 0.3s ease;
"></div>
</div>
<span style="
font-family: 'SF Mono', monospace;
font-weight: 700;
color: #000000;
min-width: 60px;
">{score:.3f}</span>
</div>
"""
def create_pii_leaderboard():
"""Create the main PII detection leaderboard interface"""
def load_leaderboard_data():
"""Load and prepare the leaderboard data"""
return load_data()
def generate_html_table(filtered_df, document_type, sort_by):
"""Generate styled HTML table with rank badges and score bars"""
table_html = """
<div class="v2-table-container">
<table class="v2-styled-table">
<thead>
<tr>
<th style="width: 80px;">Rank</th>
<th style="width: 250px;">Model</th>
<th style="width: 150px;">F1 Score</th>
<th style="width: 150px;">Recall</th>
<th style="width: 150px;">Precision</th>
<th style="width: 200px;">Overall Accuracy</th>
<th style="width: 160px;">Over-detection Rate</th>
<th>Cost/Doc ($)</th>
<th>Time (s)</th>
<th style="width: 120px;">Type</th>
<th>Vendor</th>
</tr>
</thead>
<tbody>
"""
# Generate table rows
for idx, (_, row) in enumerate(filtered_df.iterrows()):
rank = idx + 1
table_html += f"""
<tr>
<td>{get_rank_badge(rank)}</td>
<td class="model-name" style="color:#EFEBE7; font-weight:700;">{row['Model']}</td>
"""
# Get appropriate values based on document type filter
if document_type != "All":
# For specific document type, show domain-specific scores
accuracy_col = f'{document_type} Accuracy'
accuracy = row.get(accuracy_col, row.get('Overall Accuracy', ''))
else:
# For "All", show overall accuracy
accuracy = row.get('Overall Accuracy', '')
precision = row.get('Precision', '')
recall = row.get('Recall', '')
f1 = row.get('F1 Score', '')
over_detection = row.get('Over-redaction Rate', '')
cost = row.get('Cost per Document ($)', '')
time = row.get('Processing Time (s)', '')
# Add score bars in new order: F1, Recall, Precision, Accuracy
if f1 != '':
table_html += f'<td class="score-cell">{get_score_bar(f1)}</td>'
else:
table_html += '<td class="numeric-cell">-</td>'
if recall != '':
table_html += f'<td class="score-cell">{get_score_bar(recall)}</td>'
else:
table_html += '<td class="numeric-cell">-</td>'
if precision != '':
table_html += f'<td class="score-cell">{get_score_bar(precision)}</td>'
else:
table_html += '<td class="numeric-cell">-</td>'
if accuracy != '':
table_html += f'<td class="score-cell">{get_score_bar(accuracy)}</td>'
else:
table_html += '<td class="numeric-cell">-</td>'
if over_detection != '':
table_html += f'<td class="score-cell">{get_score_bar(over_detection, is_inverse=True)}</td>'
else:
table_html += '<td class="numeric-cell">-</td>'
# Format cost and time
if cost != '':
cost_display = f'${float(cost):.3f}'
else:
cost_display = '-'
if time != '':
time_display = f'{float(time):.1f}'
else:
time_display = '-'
table_html += f"""
<td class="numeric-cell">{cost_display}</td>
<td class="numeric-cell">{time_display}</td>
<td>{get_type_badge(row['Model Type'])}</td>
<td>{row['Vendor']}</td>
</tr>
"""
table_html += """
</tbody>
</table>
</div>
"""
return table_html
def filter_and_sort_data(document_type, model_type_filter, sort_by, sort_order):
"""Filter and sort the leaderboard data"""
df = load_leaderboard_data()
filtered_df = df.copy()
# Document type filtering
if document_type != "All":
# Only show models that have data for this document type
doc_col = f'{document_type} Accuracy'
if doc_col in filtered_df.columns:
filtered_df = filtered_df[filtered_df[doc_col] != '']
# Model type filtering
if model_type_filter != "All":
if model_type_filter == "Open Source":
filtered_df = filtered_df[filtered_df['Model Type'] == 'Open Source']
elif model_type_filter == "Proprietary":
filtered_df = filtered_df[filtered_df['Model Type'] == 'Proprietary']
# Sorting
sort_column = sort_by
if document_type != "All" and sort_by == 'Overall Accuracy':
sort_column = f'{document_type} Accuracy'
if sort_column in filtered_df.columns:
ascending = (sort_order == "Ascending")
# For over-detection rate, flip the logic (lower is better)
if sort_by == "Over-redaction Rate":
ascending = not ascending
filtered_df = filtered_df.sort_values(by=sort_column, ascending=ascending, na_position='last')
return generate_html_table(filtered_df, document_type, sort_by)
def generate_performance_card(model_name):
"""Generate HTML for the model performance card"""
if not model_name:
return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;">
Please select a model to generate its performance card
</div>"""
df = load_leaderboard_data()
model_data = df[df['Model'] == model_name]
if model_data.empty:
return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;">
Model not found in the database
</div>"""
row = model_data.iloc[0]
# Get overall rank
df_with_accuracy = df[df['Overall Accuracy'] != ''].copy()
df_with_accuracy['Overall Accuracy'] = pd.to_numeric(df_with_accuracy['Overall Accuracy'], errors='coerce')
df_sorted = df_with_accuracy.sort_values('Overall Accuracy', ascending=False).reset_index(drop=True)
try:
rank = df_sorted[df_sorted['Model'] == model_name].index[0] + 1
except:
rank = 'N/A'
# Format values
def format_value(val, decimals=3, prefix='', suffix=''):
if pd.isna(val) or val == '':
return 'N/A'
return f"{prefix}{float(val):.{decimals}f}{suffix}"
# Determine model type icon
type_icon = "🔓" if row['Model Type'] == 'Open Source' else "🔒"
# Calculate performance stars
def get_performance_stars(value, max_val=1.0):
if pd.isna(value) or value == '':
return '⭐' * 0
score = float(value) / max_val
if score >= 0.9:
return '⭐' * 5
elif score >= 0.8:
return '⭐' * 4
elif score >= 0.7:
return '⭐' * 3
elif score >= 0.6:
return '⭐' * 2
else:
return '⭐' * 1
# Create HTML
card_html = f"""
<div class="performance-card">
<div class="card-header">
<h1 class="card-model-name">{model_name}</h1>
<div class="card-stars">
{get_performance_stars(row['Overall Accuracy'])}
</div>
</div>
<div class="metrics-grid" style="margin-bottom: 24px;">
<div class="metric-item">
<div class="metric-icon" style="color: var(--accent-primary);">🏆</div>
<div class="metric-label">Overall Rank</div>
<div class="metric-value">#{rank}</div>
</div>
<div class="metric-item">
<div class="metric-icon" style="color: var(--accent-primary);">🎯</div>
<div class="metric-label">Overall Accuracy</div>
<div class="metric-value">{format_value(row['Overall Accuracy'])}</div>
</div>
<div class="metric-item">
<div class="metric-icon" style="color: var(--accent-secondary);">📊</div>
<div class="metric-label">Precision</div>
<div class="metric-value">{format_value(row['Precision'])}</div>
</div>
<div class="metric-item">
<div class="metric-icon" style="color: var(--accent-tertiary);">🔍</div>
<div class="metric-label">Recall</div>
<div class="metric-value">{format_value(row['Recall'])}</div>
</div>
<div class="metric-item">
<div class="metric-icon" style="color: var(--accent-quaternary);">💰</div>
<div class="metric-label">Cost/Doc</div>
<div class="metric-value">{format_value(row['Cost per Document ($)'], 3, '$')}</div>
</div>
<div class="metric-item">
<div class="metric-icon" style="color: var(--text-primary);">⚡</div>
<div class="metric-label">Processing Time</div>
<div class="metric-value">{format_value(row['Processing Time (s)'], 1, '', 's')}</div>
</div>
</div>
<div class="domains-section" style="margin-top: 24px;">
<h3 class="domains-title">📄 Document Type Performance</h3>
<div class="domains-grid">
"""
# Add document type scores
doc_types = [
('🏥', 'Healthcare'),
('💰', 'Financial'),
('🏛️', 'Government'),
('⚖️', 'Legal'),
('👤', 'Personal')
]
for doc_icon, doc_type in doc_types:
accuracy_col = f'{doc_type} Accuracy'
accuracy_value = row.get(accuracy_col, '')
if accuracy_value != '' and not pd.isna(accuracy_value):
score_display = f"{float(accuracy_value):.3f}"
score_color = "var(--accent-primary)"
else:
score_display = "N/A"
score_color = "var(--text-muted)"
card_html += f"""
<div class="domain-item">
<div class="domain-name">{doc_icon}</div>
<div style="font-size: 0.7rem; color: var(--text-secondary); margin-bottom: 2px;">{doc_type}</div>
<div class="domain-score" style="color: {score_color};">{score_display}</div>
</div>
"""
card_html += f"""
</div>
</div>
<div class="card-footer">
<div class="card-url">
<strong>LLM PII Detection Leaderboard</strong>
</div>
</div>
</div>
"""
return card_html
# Load initial data
initial_df = load_leaderboard_data()
initial_table = filter_and_sort_data("All", "All", "F1 Score", "Descending")
# Display header
gr.HTML(HEADER_CONTENT)
# Main leaderboard section with all filters
gr.HTML("""
<div class="dark-container" style="margin-bottom: 32px;">
<div class="section-header">
<span class="section-icon" style="color: var(--accent-primary);">📈</span>
<h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Archivo', sans-serif; font-weight: 700;">
PII Detection Performance Leaderboard
</h3>
</div>
<!-- Dataset Reference -->
<div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 12px; padding: 16px; margin: 16px 0 24px 0;">
<p style="color: var(--text-primary); margin: 0 0 8px 0; font-size: 1rem; font-family: 'Archivo', sans-serif; font-weight: 600;">
📊 <strong>Dataset</strong>: <a href="https://huggingface.co/datasets/nutrientdocs/DocPII-redaction-benchmark" style="color: var(--accent-primary); text-decoration: none;" target="_blank">DocPII: Contextual Redaction Benchmark Dataset</a>
</p>
<p style="color: var(--text-secondary); margin: 0; font-size: 0.95rem; font-family: 'Archivo', sans-serif; line-height: 1.4;">
DocPII contains 1,101 high-quality document samples with embedded PII, designed to evaluate context-aware redaction systems. It provides realistic, full-document contexts across healthcare, finance, and other sectors—a notable advancement over sentence-level datasets.
</p>
</div>
<p style="color: var(--text-secondary); margin-bottom: 20px; font-size: 1.1rem; font-family: 'Archivo', sans-serif;">
Filter by document type, model access, and sort by any metric to explore performance
</p>
<!-- Document Type Filter -->
<div style="margin-bottom: 24px;">
<h4 style="color: var(--text-primary); margin-bottom: 12px; font-size: 1rem;">📄 Document Type</h4>
""")
document_type_filter = gr.Radio(
choices=["All", "Healthcare", "Financial", "Government", "Legal", "Personal"],
value="All",
label="",
interactive=True,
elem_classes=["document-type-radio"]
)
gr.HTML("""
</div>
<!-- Other Filters -->
<div style="margin-bottom: 24px;">
<h4 style="color: var(--text-primary); margin-bottom: 12px; font-size: 1rem;">🔍 Filters & Sorting</h4>
</div>
""")
with gr.Row():
with gr.Column(scale=1):
model_type_filter = gr.Radio(
choices=["All", "Open Source", "Proprietary"],
value="All",
label="🔓 Model Access",
elem_classes=["compact-radio"]
)
with gr.Column(scale=1):
sort_by = gr.Dropdown(
choices=["F1 Score", "Recall", "Precision", "Overall Accuracy", "Over-redaction Rate", "Cost per Document ($)", "Processing Time (s)"],
value="F1 Score",
label="📊 Sort By",
elem_classes=["dropdown"]
)
with gr.Column(scale=1):
sort_order = gr.Radio(
choices=["Descending", "Ascending"],
value="Descending",
label="🔄 Sort Order",
elem_classes=["compact-radio"]
)
gr.HTML("""
<!-- Leaderboard Table -->
<div style="margin-top: 24px;">
<div class="dataframe-container">
""")
leaderboard_table = gr.HTML(initial_table)
gr.HTML("""
</div>
</div>
</div>""")
# Methodology section
gr.HTML(f"""
<div class="dark-container" style="margin-top: 32px;">
{METHODOLOGY}
</div>
""")
# Contribution Section
gr.HTML("""
<div class="dark-container" style="margin-top: 32px;">
<div class="section-header">
<h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Archivo', sans-serif; font-weight: 700;">
Contribute to the Leaderboard
</h3>
</div>
<div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 16px; padding: 24px; margin-bottom: 24px;">
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 2rem; align-items: center;">
<div>
<h4 style="color: var(--accent-primary); margin: 0 0 16px 0; font-size: 1.2rem; font-family: 'Archivo', sans-serif; font-weight: 600;">
Help Improve PII Detection
</h4>
<p style="color: var(--text-primary); margin: 0 0 16px 0; font-size: 1rem; font-family: 'Archivo', sans-serif; line-height: 1.6;">
Join our community and contribute to advancing PII detection capabilities! We encourage researchers and developers to:
</p>
<ul style="color: var(--text-secondary); font-size: 0.95rem; font-family: 'Archivo', sans-serif; line-height: 1.5; margin: 0; padding-left: 20px;">
<li style="margin-bottom: 8px;"><strong>Optimize prompts</strong> with existing models for better performance</li>
<li style="margin-bottom: 8px;"><strong>Test your own models</strong> on the DocPII benchmark dataset</li>
<li style="margin-bottom: 8px;"><strong>Share novel approaches</strong> and techniques for PII detection</li>
<li style="margin-bottom: 8px;"><strong>Experiment with fine-tuning</strong> strategies for document-level context</li>
</ul>
</div>
<div style="text-align: center;">
<div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 8px; padding: 16px; margin-bottom: 16px;">
<h4 style="color: var(--text-primary); margin: 0 0 8px 0; font-size: 1rem; font-family: 'Archivo', sans-serif; font-weight: 600;">
Example Notebook
</h4>
<p style="color: var(--text-secondary); margin: 0; font-size: 0.85rem; font-family: 'Archivo', sans-serif;">
Ready-to-run evaluation setup
</p>
</div>
<a href="https://colab.research.google.com/drive/1Qs5b85jWzmpFhVO-2mo0BgECCxKAeQIP?usp=sharing"
target="_blank"
rel="noopener noreferrer"
style="display: inline-block; background: var(--bg-secondary); color: var(--text-primary); border: 1px solid var(--accent-primary); padding: 10px 20px; border-radius: 6px; text-decoration: none; font-family: 'Archivo', sans-serif; font-weight: 500; font-size: 0.9rem; transition: all 0.3s ease; hover: background: var(--accent-primary);">
Open in Google Colab
</a>
</div>
</div>
</div>
<div style="background: linear-gradient(135deg, rgba(240, 201, 104, 0.1), rgba(239, 235, 231, 0.1)); border: 1px solid var(--accent-primary); border-radius: 16px; padding: 20px; text-align: center;">
<h4 style="color: var(--accent-primary); margin: 0 0 12px 0; font-size: 1.1rem; font-family: 'Archivo', sans-serif; font-weight: 600;">
How to Submit Your Results
</h4>
<p style="color: var(--text-primary); margin: 0 0 16px 0; font-size: 1rem; font-family: 'Archivo', sans-serif; line-height: 1.5;">
Share your findings with the community! Submit your results along with a Google Colab notebook demonstrating your approach.
</p>
<div style="display: flex; justify-content: center; gap: 16px; flex-wrap: wrap;">
<div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 8px; padding: 12px 16px; font-size: 0.9rem; font-family: 'Archivo', sans-serif;">
<span style="color: var(--accent-primary); font-weight: 600;">1.</span>
<span style="color: var(--text-secondary);"> Run evaluation</span>
</div>
<div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 8px; padding: 12px 16px; font-size: 0.9rem; font-family: 'Archivo', sans-serif;">
<span style="color: var(--accent-primary); font-weight: 600;">2.</span>
<span style="color: var(--text-secondary);"> Create Colab notebook</span>
</div>
<div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 8px; padding: 12px 16px; font-size: 0.9rem; font-family: 'Archivo', sans-serif;">
<span style="color: var(--accent-primary); font-weight: 600;">3.</span>
<span style="color: var(--text-secondary);"> Add Discussion in Community</span>
</div>
</div>
</div>
</div>
""")
# Performance Card Section
gr.HTML("""
<div class="dark-container" style="margin-top: 32px;">
<div class="section-header">
<span class="section-icon" style="color: var(--accent-primary);">🎯</span>
<h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Archivo', sans-serif; font-weight: 700;">
Model Performance Cards
</h3>
</div>
<p style="color: var(--text-secondary); margin-bottom: 20px; font-size: 1.1rem; font-family: 'Archivo', sans-serif; text-align: center;">
Dive deep into individual model performance across all metrics and document types
</p>
""")
card_model_selector = gr.Dropdown(
choices=initial_df['Model'].tolist(),
value=initial_df['Model'].tolist()[0] if len(initial_df) > 0 else None,
label="🤖 Select Model",
info="Choose a model to view its performance card",
elem_classes=["dropdown"]
)
gr.HTML("""
</div>
</div>
<div style="width: 100%;">
""")
# Card display area
initial_model = initial_df['Model'].tolist()[0] if len(initial_df) > 0 else None
initial_card_html = generate_performance_card(initial_model) if initial_model else ""
card_display = gr.HTML(value=initial_card_html, elem_id="performance-card-html")
gr.HTML("""
</div>
<div style="text-align: center; margin-top: 24px; padding-top: 12px; border-top: 1px solid var(--border-subtle);">
<span style="color: var(--text-secondary); font-family: 'Archivo', sans-serif;">Powered by <a href=\"https://nutrient.io\" target=\"_blank\" rel=\"noopener noreferrer\" style=\"color: var(--accent-primary); font-weight: 700; text-decoration: none;\">Nutrient</a></span>
</div>
</div>
</div>""")
# Add performance card CSS
gr.HTML(f"""
<style>
.performance-card {{
background: var(--bg-card);
border: 2px solid var(--accent-primary);
border-radius: 24px;
padding: 32px;
max-width: 700px;
margin: 0 auto;
position: relative;
overflow: hidden;
box-shadow:
0 20px 40px rgba(0, 0, 0, 0.5),
0 0 80px rgba(240, 201, 104, 0.2),
inset 0 0 120px rgba(240, 201, 104, 0.08);
}}
.card-header {{
text-align: center;
margin-bottom: 24px;
position: relative;
z-index: 1;
}}
.card-model-name {{
font-size: 2rem;
font-weight: 800;
color: var(--text-primary);
margin-bottom: 8px;
line-height: 1.2;
font-family: 'Archivo', sans-serif;
}}
.card-stars {{
font-size: 1.2rem;
margin: 8px 0;
}}
.metrics-grid {{
display: grid;
grid-template-columns: repeat(2, 1fr);
gap: 16px;
margin: 24px 0;
}}
.metric-item {{
display: flex;
flex-direction: column;
align-items: center;
padding: 16px;
background: rgba(239, 235, 231, 0.05);
border-radius: 12px;
border: 1px solid var(--border-subtle);
transition: all 0.3s ease;
}}
.metric-item:hover {{
transform: translateY(-2px);
border-color: var(--accent-primary);
box-shadow: 0 8px 16px rgba(240, 201, 104, 0.25);
}}
.metric-icon {{
font-size: 1.5rem;
margin-bottom: 8px;
}}
.metric-label {{
font-size: 0.85rem;
color: var(--text-secondary);
margin-bottom: 4px;
text-align: center;
}}
.metric-value {{
font-size: 1.1rem;
font-weight: 700;
color: var(--text-primary);
text-align: center;
}}
.domains-section {{
margin-top: 24px;
}}
.domains-title {{
color: var(--text-primary);
font-size: 1.2rem;
margin-bottom: 16px;
text-align: center;
}}
.domains-grid {{
display: grid;
grid-template-columns: repeat(5, 1fr);
gap: 12px;
}}
.domain-item {{
display: flex;
flex-direction: column;
align-items: center;
padding: 12px;
background: rgba(239, 235, 231, 0.03);
border-radius: 8px;
border: 1px solid var(--border-subtle);
transition: all 0.3s ease;
}}
.domain-item:hover {{
border-color: var(--accent-primary);
transform: scale(1.02);
}}
.domain-name {{
font-size: 1.2rem;
margin-bottom: 4px;
}}
.domain-score {{
font-size: 0.9rem;
font-weight: 600;
}}
.card-footer {{
text-align: center;
margin-top: 24px;
padding-top: 16px;
border-top: 1px solid var(--border-subtle);
}}
.card-url {{
color: var(--text-secondary);
font-size: 0.9rem;
}}
/* Additional styling for radio buttons and specific components */
.document-type-radio .wrap {{
display: flex !important;
gap: 12px !important;
flex-wrap: wrap !important;
justify-content: center !important;
}}
.document-type-radio .wrap > label {{
flex: 1 !important;
min-width: 140px !important;
max-width: 180px !important;
padding: 12px 16px !important;
background: var(--bg-card) !important;
border: 2px solid var(--border-default) !important;
border-radius: 12px !important;
cursor: pointer !important;
transition: all 0.3s ease !important;
text-align: center !important;
font-weight: 500 !important;
}}
.document-type-radio .wrap > label:hover {{
border-color: var(--accent-primary) !important;
transform: translateY(-2px) !important;
}}
.document-type-radio .wrap > label:has(input[type="radio"]:checked) {{
background: transparent !important;
border-color: var(--accent-primary) !important;
color: var(--text-primary) !important;
font-weight: 600 !important;
box-shadow: 0 8px 16px var(--glow-primary) !important;
}}
.document-type-radio input[type="radio"] {{
display: none !important;
}}
.compact-radio .wrap > label {{
padding: 8px 12px !important;
font-size: 0.85rem !important;
min-width: auto !important;
max-width: 120px !important;
}}
</style>
""")
# Update functions
def update_table(*args):
return filter_and_sort_data(*args)
def update_card(model_name):
return generate_performance_card(model_name)
# Connect update functions to components
filter_inputs = [document_type_filter, model_type_filter, sort_by, sort_order]
for input_component in filter_inputs:
input_component.change(
fn=update_table,
inputs=filter_inputs,
outputs=[leaderboard_table]
)
# Update card when model selection changes
card_model_selector.change(
fn=update_card,
inputs=[card_model_selector],
outputs=[card_display]
)
def create_app():
"""Create the main Gradio application"""
with gr.Blocks(
theme=gr.themes.Default(),
title="🔒 LLM PII Detection Leaderboard"
) as app:
create_pii_leaderboard()
return app
if __name__ == "__main__":
demo = create_app()
demo.launch()