Luis Kalckstein
New benchmarking results from improved dataset and contribution notebook
ebc9315
unverified
| import gradio as gr | |
| import pandas as pd | |
| import tempfile | |
| import os | |
| from data_loader import ( | |
| load_data, | |
| PII_CATEGORIES, | |
| HEADER_CONTENT, | |
| METHODOLOGY, | |
| COLORS, | |
| MODEL_TYPES | |
| ) | |
| def get_rank_badge(rank): | |
| """Generate HTML for rank badge with appropriate styling""" | |
| badge_styles = { | |
| 1: ("1st", COLORS['digital_pollen'], COLORS['warm_black']), | |
| 2: ("2nd", COLORS['soft_grey'], COLORS['black']), | |
| 3: ("3rd", COLORS['code_coral'], COLORS['black']), | |
| } | |
| if rank in badge_styles: | |
| label, bg_color, text_color = badge_styles[rank] | |
| return f""" | |
| <div style=" | |
| display: inline-flex; | |
| align-items: center; | |
| justify-content: center; | |
| min-width: 48px; | |
| padding: 4px 12px; | |
| background: {bg_color}; | |
| color: {text_color} !important; | |
| border-radius: 6px; | |
| font-weight: 600; | |
| font-size: 0.9em; | |
| box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2); | |
| font-family: 'Archivo', sans-serif; | |
| "> | |
| {label} | |
| </div> | |
| """ | |
| return f""" | |
| <div style=" | |
| display: inline-flex; | |
| align-items: center; | |
| justify-content: center; | |
| min-width: 28px; | |
| color: var(--text-secondary); | |
| font-weight: 500; | |
| "> | |
| {rank} | |
| </div> | |
| """ | |
| def get_type_badge(model_type): | |
| """Generate HTML for model type badge""" | |
| bg_color = COLORS['digital_pollen'] if model_type == 'Proprietary' else COLORS['data_green'] | |
| # Use black text for better readability on brand accent backgrounds | |
| text_color = '#000000' | |
| return f""" | |
| <div style=" | |
| display: inline-flex; | |
| align-items: center; | |
| padding: 4px 8px; | |
| background: {bg_color}; | |
| color: {text_color} !important; | |
| border-radius: 4px; | |
| font-size: 0.85em; | |
| font-weight: 600; | |
| font-family: 'Archivo', sans-serif; | |
| "> | |
| {model_type} | |
| </div> | |
| """ | |
| def get_score_bar(score, is_inverse=False): | |
| """Generate HTML for score bar with gradient styling""" | |
| if pd.isna(score) or score == '': | |
| score = 0 | |
| else: | |
| score = float(score) | |
| width = score * 100 | |
| # For over-detection rate, use inverse coloring (lower is better) | |
| if is_inverse: | |
| bar_color = COLORS['code_coral'] if score > 0.5 else COLORS['data_green'] | |
| else: | |
| bar_color = COLORS['data_green'] if score > 0.5 else COLORS['code_coral'] | |
| return f""" | |
| <div style="display: flex; align-items: center; gap: 12px; width: 100%;"> | |
| <div style=" | |
| flex-grow: 1; | |
| height: 8px; | |
| background: rgba(239, 235, 231, 0.1); | |
| border-radius: 4px; | |
| overflow: hidden; | |
| max-width: 200px; | |
| "> | |
| <div style=" | |
| width: {width}%; | |
| height: 100%; | |
| background: {bar_color}; | |
| border-radius: 4px; | |
| transition: width 0.3s ease; | |
| "></div> | |
| </div> | |
| <span style=" | |
| font-family: 'SF Mono', monospace; | |
| font-weight: 700; | |
| color: #000000; | |
| min-width: 60px; | |
| ">{score:.3f}</span> | |
| </div> | |
| """ | |
| def create_pii_leaderboard(): | |
| """Create the main PII detection leaderboard interface""" | |
| def load_leaderboard_data(): | |
| """Load and prepare the leaderboard data""" | |
| return load_data() | |
| def generate_html_table(filtered_df, document_type, sort_by): | |
| """Generate styled HTML table with rank badges and score bars""" | |
| table_html = """ | |
| <div class="v2-table-container"> | |
| <table class="v2-styled-table"> | |
| <thead> | |
| <tr> | |
| <th style="width: 80px;">Rank</th> | |
| <th style="width: 250px;">Model</th> | |
| <th style="width: 150px;">F1 Score</th> | |
| <th style="width: 150px;">Recall</th> | |
| <th style="width: 150px;">Precision</th> | |
| <th style="width: 200px;">Overall Accuracy</th> | |
| <th style="width: 160px;">Over-detection Rate</th> | |
| <th>Cost/Doc ($)</th> | |
| <th>Time (s)</th> | |
| <th style="width: 120px;">Type</th> | |
| <th>Vendor</th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| """ | |
| # Generate table rows | |
| for idx, (_, row) in enumerate(filtered_df.iterrows()): | |
| rank = idx + 1 | |
| table_html += f""" | |
| <tr> | |
| <td>{get_rank_badge(rank)}</td> | |
| <td class="model-name" style="color:#EFEBE7; font-weight:700;">{row['Model']}</td> | |
| """ | |
| # Get appropriate values based on document type filter | |
| if document_type != "All": | |
| # For specific document type, show domain-specific scores | |
| accuracy_col = f'{document_type} Accuracy' | |
| accuracy = row.get(accuracy_col, row.get('Overall Accuracy', '')) | |
| else: | |
| # For "All", show overall accuracy | |
| accuracy = row.get('Overall Accuracy', '') | |
| precision = row.get('Precision', '') | |
| recall = row.get('Recall', '') | |
| f1 = row.get('F1 Score', '') | |
| over_detection = row.get('Over-redaction Rate', '') | |
| cost = row.get('Cost per Document ($)', '') | |
| time = row.get('Processing Time (s)', '') | |
| # Add score bars in new order: F1, Recall, Precision, Accuracy | |
| if f1 != '': | |
| table_html += f'<td class="score-cell">{get_score_bar(f1)}</td>' | |
| else: | |
| table_html += '<td class="numeric-cell">-</td>' | |
| if recall != '': | |
| table_html += f'<td class="score-cell">{get_score_bar(recall)}</td>' | |
| else: | |
| table_html += '<td class="numeric-cell">-</td>' | |
| if precision != '': | |
| table_html += f'<td class="score-cell">{get_score_bar(precision)}</td>' | |
| else: | |
| table_html += '<td class="numeric-cell">-</td>' | |
| if accuracy != '': | |
| table_html += f'<td class="score-cell">{get_score_bar(accuracy)}</td>' | |
| else: | |
| table_html += '<td class="numeric-cell">-</td>' | |
| if over_detection != '': | |
| table_html += f'<td class="score-cell">{get_score_bar(over_detection, is_inverse=True)}</td>' | |
| else: | |
| table_html += '<td class="numeric-cell">-</td>' | |
| # Format cost and time | |
| if cost != '': | |
| cost_display = f'${float(cost):.3f}' | |
| else: | |
| cost_display = '-' | |
| if time != '': | |
| time_display = f'{float(time):.1f}' | |
| else: | |
| time_display = '-' | |
| table_html += f""" | |
| <td class="numeric-cell">{cost_display}</td> | |
| <td class="numeric-cell">{time_display}</td> | |
| <td>{get_type_badge(row['Model Type'])}</td> | |
| <td>{row['Vendor']}</td> | |
| </tr> | |
| """ | |
| table_html += """ | |
| </tbody> | |
| </table> | |
| </div> | |
| """ | |
| return table_html | |
| def filter_and_sort_data(document_type, model_type_filter, sort_by, sort_order): | |
| """Filter and sort the leaderboard data""" | |
| df = load_leaderboard_data() | |
| filtered_df = df.copy() | |
| # Document type filtering | |
| if document_type != "All": | |
| # Only show models that have data for this document type | |
| doc_col = f'{document_type} Accuracy' | |
| if doc_col in filtered_df.columns: | |
| filtered_df = filtered_df[filtered_df[doc_col] != ''] | |
| # Model type filtering | |
| if model_type_filter != "All": | |
| if model_type_filter == "Open Source": | |
| filtered_df = filtered_df[filtered_df['Model Type'] == 'Open Source'] | |
| elif model_type_filter == "Proprietary": | |
| filtered_df = filtered_df[filtered_df['Model Type'] == 'Proprietary'] | |
| # Sorting | |
| sort_column = sort_by | |
| if document_type != "All" and sort_by == 'Overall Accuracy': | |
| sort_column = f'{document_type} Accuracy' | |
| if sort_column in filtered_df.columns: | |
| ascending = (sort_order == "Ascending") | |
| # For over-detection rate, flip the logic (lower is better) | |
| if sort_by == "Over-redaction Rate": | |
| ascending = not ascending | |
| filtered_df = filtered_df.sort_values(by=sort_column, ascending=ascending, na_position='last') | |
| return generate_html_table(filtered_df, document_type, sort_by) | |
| def generate_performance_card(model_name): | |
| """Generate HTML for the model performance card""" | |
| if not model_name: | |
| return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;"> | |
| Please select a model to generate its performance card | |
| </div>""" | |
| df = load_leaderboard_data() | |
| model_data = df[df['Model'] == model_name] | |
| if model_data.empty: | |
| return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;"> | |
| Model not found in the database | |
| </div>""" | |
| row = model_data.iloc[0] | |
| # Get overall rank | |
| df_with_accuracy = df[df['Overall Accuracy'] != ''].copy() | |
| df_with_accuracy['Overall Accuracy'] = pd.to_numeric(df_with_accuracy['Overall Accuracy'], errors='coerce') | |
| df_sorted = df_with_accuracy.sort_values('Overall Accuracy', ascending=False).reset_index(drop=True) | |
| try: | |
| rank = df_sorted[df_sorted['Model'] == model_name].index[0] + 1 | |
| except: | |
| rank = 'N/A' | |
| # Format values | |
| def format_value(val, decimals=3, prefix='', suffix=''): | |
| if pd.isna(val) or val == '': | |
| return 'N/A' | |
| return f"{prefix}{float(val):.{decimals}f}{suffix}" | |
| # Determine model type icon | |
| type_icon = "🔓" if row['Model Type'] == 'Open Source' else "🔒" | |
| # Calculate performance stars | |
| def get_performance_stars(value, max_val=1.0): | |
| if pd.isna(value) or value == '': | |
| return '⭐' * 0 | |
| score = float(value) / max_val | |
| if score >= 0.9: | |
| return '⭐' * 5 | |
| elif score >= 0.8: | |
| return '⭐' * 4 | |
| elif score >= 0.7: | |
| return '⭐' * 3 | |
| elif score >= 0.6: | |
| return '⭐' * 2 | |
| else: | |
| return '⭐' * 1 | |
| # Create HTML | |
| card_html = f""" | |
| <div class="performance-card"> | |
| <div class="card-header"> | |
| <h1 class="card-model-name">{model_name}</h1> | |
| <div class="card-stars"> | |
| {get_performance_stars(row['Overall Accuracy'])} | |
| </div> | |
| </div> | |
| <div class="metrics-grid" style="margin-bottom: 24px;"> | |
| <div class="metric-item"> | |
| <div class="metric-icon" style="color: var(--accent-primary);">🏆</div> | |
| <div class="metric-label">Overall Rank</div> | |
| <div class="metric-value">#{rank}</div> | |
| </div> | |
| <div class="metric-item"> | |
| <div class="metric-icon" style="color: var(--accent-primary);">🎯</div> | |
| <div class="metric-label">Overall Accuracy</div> | |
| <div class="metric-value">{format_value(row['Overall Accuracy'])}</div> | |
| </div> | |
| <div class="metric-item"> | |
| <div class="metric-icon" style="color: var(--accent-secondary);">📊</div> | |
| <div class="metric-label">Precision</div> | |
| <div class="metric-value">{format_value(row['Precision'])}</div> | |
| </div> | |
| <div class="metric-item"> | |
| <div class="metric-icon" style="color: var(--accent-tertiary);">🔍</div> | |
| <div class="metric-label">Recall</div> | |
| <div class="metric-value">{format_value(row['Recall'])}</div> | |
| </div> | |
| <div class="metric-item"> | |
| <div class="metric-icon" style="color: var(--accent-quaternary);">💰</div> | |
| <div class="metric-label">Cost/Doc</div> | |
| <div class="metric-value">{format_value(row['Cost per Document ($)'], 3, '$')}</div> | |
| </div> | |
| <div class="metric-item"> | |
| <div class="metric-icon" style="color: var(--text-primary);">⚡</div> | |
| <div class="metric-label">Processing Time</div> | |
| <div class="metric-value">{format_value(row['Processing Time (s)'], 1, '', 's')}</div> | |
| </div> | |
| </div> | |
| <div class="domains-section" style="margin-top: 24px;"> | |
| <h3 class="domains-title">📄 Document Type Performance</h3> | |
| <div class="domains-grid"> | |
| """ | |
| # Add document type scores | |
| doc_types = [ | |
| ('🏥', 'Healthcare'), | |
| ('💰', 'Financial'), | |
| ('🏛️', 'Government'), | |
| ('⚖️', 'Legal'), | |
| ('👤', 'Personal') | |
| ] | |
| for doc_icon, doc_type in doc_types: | |
| accuracy_col = f'{doc_type} Accuracy' | |
| accuracy_value = row.get(accuracy_col, '') | |
| if accuracy_value != '' and not pd.isna(accuracy_value): | |
| score_display = f"{float(accuracy_value):.3f}" | |
| score_color = "var(--accent-primary)" | |
| else: | |
| score_display = "N/A" | |
| score_color = "var(--text-muted)" | |
| card_html += f""" | |
| <div class="domain-item"> | |
| <div class="domain-name">{doc_icon}</div> | |
| <div style="font-size: 0.7rem; color: var(--text-secondary); margin-bottom: 2px;">{doc_type}</div> | |
| <div class="domain-score" style="color: {score_color};">{score_display}</div> | |
| </div> | |
| """ | |
| card_html += f""" | |
| </div> | |
| </div> | |
| <div class="card-footer"> | |
| <div class="card-url"> | |
| <strong>LLM PII Detection Leaderboard</strong> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| return card_html | |
| # Load initial data | |
| initial_df = load_leaderboard_data() | |
| initial_table = filter_and_sort_data("All", "All", "F1 Score", "Descending") | |
| # Display header | |
| gr.HTML(HEADER_CONTENT) | |
| # Main leaderboard section with all filters | |
| gr.HTML(""" | |
| <div class="dark-container" style="margin-bottom: 32px;"> | |
| <div class="section-header"> | |
| <span class="section-icon" style="color: var(--accent-primary);">📈</span> | |
| <h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Archivo', sans-serif; font-weight: 700;"> | |
| PII Detection Performance Leaderboard | |
| </h3> | |
| </div> | |
| <!-- Dataset Reference --> | |
| <div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 12px; padding: 16px; margin: 16px 0 24px 0;"> | |
| <p style="color: var(--text-primary); margin: 0 0 8px 0; font-size: 1rem; font-family: 'Archivo', sans-serif; font-weight: 600;"> | |
| 📊 <strong>Dataset</strong>: <a href="https://huggingface.co/datasets/nutrientdocs/DocPII-redaction-benchmark" style="color: var(--accent-primary); text-decoration: none;" target="_blank">DocPII: Contextual Redaction Benchmark Dataset</a> | |
| </p> | |
| <p style="color: var(--text-secondary); margin: 0; font-size: 0.95rem; font-family: 'Archivo', sans-serif; line-height: 1.4;"> | |
| DocPII contains 1,101 high-quality document samples with embedded PII, designed to evaluate context-aware redaction systems. It provides realistic, full-document contexts across healthcare, finance, and other sectors—a notable advancement over sentence-level datasets. | |
| </p> | |
| </div> | |
| <p style="color: var(--text-secondary); margin-bottom: 20px; font-size: 1.1rem; font-family: 'Archivo', sans-serif;"> | |
| Filter by document type, model access, and sort by any metric to explore performance | |
| </p> | |
| <!-- Document Type Filter --> | |
| <div style="margin-bottom: 24px;"> | |
| <h4 style="color: var(--text-primary); margin-bottom: 12px; font-size: 1rem;">📄 Document Type</h4> | |
| """) | |
| document_type_filter = gr.Radio( | |
| choices=["All", "Healthcare", "Financial", "Government", "Legal", "Personal"], | |
| value="All", | |
| label="", | |
| interactive=True, | |
| elem_classes=["document-type-radio"] | |
| ) | |
| gr.HTML(""" | |
| </div> | |
| <!-- Other Filters --> | |
| <div style="margin-bottom: 24px;"> | |
| <h4 style="color: var(--text-primary); margin-bottom: 12px; font-size: 1rem;">🔍 Filters & Sorting</h4> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| model_type_filter = gr.Radio( | |
| choices=["All", "Open Source", "Proprietary"], | |
| value="All", | |
| label="🔓 Model Access", | |
| elem_classes=["compact-radio"] | |
| ) | |
| with gr.Column(scale=1): | |
| sort_by = gr.Dropdown( | |
| choices=["F1 Score", "Recall", "Precision", "Overall Accuracy", "Over-redaction Rate", "Cost per Document ($)", "Processing Time (s)"], | |
| value="F1 Score", | |
| label="📊 Sort By", | |
| elem_classes=["dropdown"] | |
| ) | |
| with gr.Column(scale=1): | |
| sort_order = gr.Radio( | |
| choices=["Descending", "Ascending"], | |
| value="Descending", | |
| label="🔄 Sort Order", | |
| elem_classes=["compact-radio"] | |
| ) | |
| gr.HTML(""" | |
| <!-- Leaderboard Table --> | |
| <div style="margin-top: 24px;"> | |
| <div class="dataframe-container"> | |
| """) | |
| leaderboard_table = gr.HTML(initial_table) | |
| gr.HTML(""" | |
| </div> | |
| </div> | |
| </div>""") | |
| # Methodology section | |
| gr.HTML(f""" | |
| <div class="dark-container" style="margin-top: 32px;"> | |
| {METHODOLOGY} | |
| </div> | |
| """) | |
| # Contribution Section | |
| gr.HTML(""" | |
| <div class="dark-container" style="margin-top: 32px;"> | |
| <div class="section-header"> | |
| <h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Archivo', sans-serif; font-weight: 700;"> | |
| Contribute to the Leaderboard | |
| </h3> | |
| </div> | |
| <div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 16px; padding: 24px; margin-bottom: 24px;"> | |
| <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 2rem; align-items: center;"> | |
| <div> | |
| <h4 style="color: var(--accent-primary); margin: 0 0 16px 0; font-size: 1.2rem; font-family: 'Archivo', sans-serif; font-weight: 600;"> | |
| Help Improve PII Detection | |
| </h4> | |
| <p style="color: var(--text-primary); margin: 0 0 16px 0; font-size: 1rem; font-family: 'Archivo', sans-serif; line-height: 1.6;"> | |
| Join our community and contribute to advancing PII detection capabilities! We encourage researchers and developers to: | |
| </p> | |
| <ul style="color: var(--text-secondary); font-size: 0.95rem; font-family: 'Archivo', sans-serif; line-height: 1.5; margin: 0; padding-left: 20px;"> | |
| <li style="margin-bottom: 8px;"><strong>Optimize prompts</strong> with existing models for better performance</li> | |
| <li style="margin-bottom: 8px;"><strong>Test your own models</strong> on the DocPII benchmark dataset</li> | |
| <li style="margin-bottom: 8px;"><strong>Share novel approaches</strong> and techniques for PII detection</li> | |
| <li style="margin-bottom: 8px;"><strong>Experiment with fine-tuning</strong> strategies for document-level context</li> | |
| </ul> | |
| </div> | |
| <div style="text-align: center;"> | |
| <div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 8px; padding: 16px; margin-bottom: 16px;"> | |
| <h4 style="color: var(--text-primary); margin: 0 0 8px 0; font-size: 1rem; font-family: 'Archivo', sans-serif; font-weight: 600;"> | |
| Example Notebook | |
| </h4> | |
| <p style="color: var(--text-secondary); margin: 0; font-size: 0.85rem; font-family: 'Archivo', sans-serif;"> | |
| Ready-to-run evaluation setup | |
| </p> | |
| </div> | |
| <a href="https://colab.research.google.com/drive/1Qs5b85jWzmpFhVO-2mo0BgECCxKAeQIP?usp=sharing" | |
| target="_blank" | |
| rel="noopener noreferrer" | |
| style="display: inline-block; background: var(--bg-secondary); color: var(--text-primary); border: 1px solid var(--accent-primary); padding: 10px 20px; border-radius: 6px; text-decoration: none; font-family: 'Archivo', sans-serif; font-weight: 500; font-size: 0.9rem; transition: all 0.3s ease; hover: background: var(--accent-primary);"> | |
| Open in Google Colab | |
| </a> | |
| </div> | |
| </div> | |
| </div> | |
| <div style="background: linear-gradient(135deg, rgba(240, 201, 104, 0.1), rgba(239, 235, 231, 0.1)); border: 1px solid var(--accent-primary); border-radius: 16px; padding: 20px; text-align: center;"> | |
| <h4 style="color: var(--accent-primary); margin: 0 0 12px 0; font-size: 1.1rem; font-family: 'Archivo', sans-serif; font-weight: 600;"> | |
| How to Submit Your Results | |
| </h4> | |
| <p style="color: var(--text-primary); margin: 0 0 16px 0; font-size: 1rem; font-family: 'Archivo', sans-serif; line-height: 1.5;"> | |
| Share your findings with the community! Submit your results along with a Google Colab notebook demonstrating your approach. | |
| </p> | |
| <div style="display: flex; justify-content: center; gap: 16px; flex-wrap: wrap;"> | |
| <div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 8px; padding: 12px 16px; font-size: 0.9rem; font-family: 'Archivo', sans-serif;"> | |
| <span style="color: var(--accent-primary); font-weight: 600;">1.</span> | |
| <span style="color: var(--text-secondary);"> Run evaluation</span> | |
| </div> | |
| <div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 8px; padding: 12px 16px; font-size: 0.9rem; font-family: 'Archivo', sans-serif;"> | |
| <span style="color: var(--accent-primary); font-weight: 600;">2.</span> | |
| <span style="color: var(--text-secondary);"> Create Colab notebook</span> | |
| </div> | |
| <div style="background: var(--bg-secondary); border: 1px solid var(--border-subtle); border-radius: 8px; padding: 12px 16px; font-size: 0.9rem; font-family: 'Archivo', sans-serif;"> | |
| <span style="color: var(--accent-primary); font-weight: 600;">3.</span> | |
| <span style="color: var(--text-secondary);"> Add Discussion in Community</span> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| """) | |
| # Performance Card Section | |
| gr.HTML(""" | |
| <div class="dark-container" style="margin-top: 32px;"> | |
| <div class="section-header"> | |
| <span class="section-icon" style="color: var(--accent-primary);">🎯</span> | |
| <h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Archivo', sans-serif; font-weight: 700;"> | |
| Model Performance Cards | |
| </h3> | |
| </div> | |
| <p style="color: var(--text-secondary); margin-bottom: 20px; font-size: 1.1rem; font-family: 'Archivo', sans-serif; text-align: center;"> | |
| Dive deep into individual model performance across all metrics and document types | |
| </p> | |
| """) | |
| card_model_selector = gr.Dropdown( | |
| choices=initial_df['Model'].tolist(), | |
| value=initial_df['Model'].tolist()[0] if len(initial_df) > 0 else None, | |
| label="🤖 Select Model", | |
| info="Choose a model to view its performance card", | |
| elem_classes=["dropdown"] | |
| ) | |
| gr.HTML(""" | |
| </div> | |
| </div> | |
| <div style="width: 100%;"> | |
| """) | |
| # Card display area | |
| initial_model = initial_df['Model'].tolist()[0] if len(initial_df) > 0 else None | |
| initial_card_html = generate_performance_card(initial_model) if initial_model else "" | |
| card_display = gr.HTML(value=initial_card_html, elem_id="performance-card-html") | |
| gr.HTML(""" | |
| </div> | |
| <div style="text-align: center; margin-top: 24px; padding-top: 12px; border-top: 1px solid var(--border-subtle);"> | |
| <span style="color: var(--text-secondary); font-family: 'Archivo', sans-serif;">Powered by <a href=\"https://nutrient.io\" target=\"_blank\" rel=\"noopener noreferrer\" style=\"color: var(--accent-primary); font-weight: 700; text-decoration: none;\">Nutrient</a></span> | |
| </div> | |
| </div> | |
| </div>""") | |
| # Add performance card CSS | |
| gr.HTML(f""" | |
| <style> | |
| .performance-card {{ | |
| background: var(--bg-card); | |
| border: 2px solid var(--accent-primary); | |
| border-radius: 24px; | |
| padding: 32px; | |
| max-width: 700px; | |
| margin: 0 auto; | |
| position: relative; | |
| overflow: hidden; | |
| box-shadow: | |
| 0 20px 40px rgba(0, 0, 0, 0.5), | |
| 0 0 80px rgba(240, 201, 104, 0.2), | |
| inset 0 0 120px rgba(240, 201, 104, 0.08); | |
| }} | |
| .card-header {{ | |
| text-align: center; | |
| margin-bottom: 24px; | |
| position: relative; | |
| z-index: 1; | |
| }} | |
| .card-model-name {{ | |
| font-size: 2rem; | |
| font-weight: 800; | |
| color: var(--text-primary); | |
| margin-bottom: 8px; | |
| line-height: 1.2; | |
| font-family: 'Archivo', sans-serif; | |
| }} | |
| .card-stars {{ | |
| font-size: 1.2rem; | |
| margin: 8px 0; | |
| }} | |
| .metrics-grid {{ | |
| display: grid; | |
| grid-template-columns: repeat(2, 1fr); | |
| gap: 16px; | |
| margin: 24px 0; | |
| }} | |
| .metric-item {{ | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| padding: 16px; | |
| background: rgba(239, 235, 231, 0.05); | |
| border-radius: 12px; | |
| border: 1px solid var(--border-subtle); | |
| transition: all 0.3s ease; | |
| }} | |
| .metric-item:hover {{ | |
| transform: translateY(-2px); | |
| border-color: var(--accent-primary); | |
| box-shadow: 0 8px 16px rgba(240, 201, 104, 0.25); | |
| }} | |
| .metric-icon {{ | |
| font-size: 1.5rem; | |
| margin-bottom: 8px; | |
| }} | |
| .metric-label {{ | |
| font-size: 0.85rem; | |
| color: var(--text-secondary); | |
| margin-bottom: 4px; | |
| text-align: center; | |
| }} | |
| .metric-value {{ | |
| font-size: 1.1rem; | |
| font-weight: 700; | |
| color: var(--text-primary); | |
| text-align: center; | |
| }} | |
| .domains-section {{ | |
| margin-top: 24px; | |
| }} | |
| .domains-title {{ | |
| color: var(--text-primary); | |
| font-size: 1.2rem; | |
| margin-bottom: 16px; | |
| text-align: center; | |
| }} | |
| .domains-grid {{ | |
| display: grid; | |
| grid-template-columns: repeat(5, 1fr); | |
| gap: 12px; | |
| }} | |
| .domain-item {{ | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| padding: 12px; | |
| background: rgba(239, 235, 231, 0.03); | |
| border-radius: 8px; | |
| border: 1px solid var(--border-subtle); | |
| transition: all 0.3s ease; | |
| }} | |
| .domain-item:hover {{ | |
| border-color: var(--accent-primary); | |
| transform: scale(1.02); | |
| }} | |
| .domain-name {{ | |
| font-size: 1.2rem; | |
| margin-bottom: 4px; | |
| }} | |
| .domain-score {{ | |
| font-size: 0.9rem; | |
| font-weight: 600; | |
| }} | |
| .card-footer {{ | |
| text-align: center; | |
| margin-top: 24px; | |
| padding-top: 16px; | |
| border-top: 1px solid var(--border-subtle); | |
| }} | |
| .card-url {{ | |
| color: var(--text-secondary); | |
| font-size: 0.9rem; | |
| }} | |
| /* Additional styling for radio buttons and specific components */ | |
| .document-type-radio .wrap {{ | |
| display: flex !important; | |
| gap: 12px !important; | |
| flex-wrap: wrap !important; | |
| justify-content: center !important; | |
| }} | |
| .document-type-radio .wrap > label {{ | |
| flex: 1 !important; | |
| min-width: 140px !important; | |
| max-width: 180px !important; | |
| padding: 12px 16px !important; | |
| background: var(--bg-card) !important; | |
| border: 2px solid var(--border-default) !important; | |
| border-radius: 12px !important; | |
| cursor: pointer !important; | |
| transition: all 0.3s ease !important; | |
| text-align: center !important; | |
| font-weight: 500 !important; | |
| }} | |
| .document-type-radio .wrap > label:hover {{ | |
| border-color: var(--accent-primary) !important; | |
| transform: translateY(-2px) !important; | |
| }} | |
| .document-type-radio .wrap > label:has(input[type="radio"]:checked) {{ | |
| background: transparent !important; | |
| border-color: var(--accent-primary) !important; | |
| color: var(--text-primary) !important; | |
| font-weight: 600 !important; | |
| box-shadow: 0 8px 16px var(--glow-primary) !important; | |
| }} | |
| .document-type-radio input[type="radio"] {{ | |
| display: none !important; | |
| }} | |
| .compact-radio .wrap > label {{ | |
| padding: 8px 12px !important; | |
| font-size: 0.85rem !important; | |
| min-width: auto !important; | |
| max-width: 120px !important; | |
| }} | |
| </style> | |
| """) | |
| # Update functions | |
| def update_table(*args): | |
| return filter_and_sort_data(*args) | |
| def update_card(model_name): | |
| return generate_performance_card(model_name) | |
| # Connect update functions to components | |
| filter_inputs = [document_type_filter, model_type_filter, sort_by, sort_order] | |
| for input_component in filter_inputs: | |
| input_component.change( | |
| fn=update_table, | |
| inputs=filter_inputs, | |
| outputs=[leaderboard_table] | |
| ) | |
| # Update card when model selection changes | |
| card_model_selector.change( | |
| fn=update_card, | |
| inputs=[card_model_selector], | |
| outputs=[card_display] | |
| ) | |
| def create_app(): | |
| """Create the main Gradio application""" | |
| with gr.Blocks( | |
| theme=gr.themes.Default(), | |
| title="🔒 LLM PII Detection Leaderboard" | |
| ) as app: | |
| create_pii_leaderboard() | |
| return app | |
| if __name__ == "__main__": | |
| demo = create_app() | |
| demo.launch() |