| import gradio as gr |
| import pandas as pd |
| import tempfile |
| import os |
| from data_loader import ( |
| load_data, |
| PII_CATEGORIES, |
| HEADER_CONTENT, |
| METHODOLOGY, |
| COLORS, |
| MODEL_TYPES |
| ) |
|
|
| def get_rank_badge(rank): |
| """Generate HTML for rank badge with appropriate styling""" |
| badge_styles = { |
| 1: ("1st", f"linear-gradient(145deg, {COLORS['digital_pollen']}, {COLORS['digital_pollen']})", COLORS['warm_black']), |
| 2: ("2nd", f"linear-gradient(145deg, {COLORS['soft_grey']}, {COLORS['warm_grey']})", COLORS['white']), |
| 3: ("3rd", f"linear-gradient(145deg, {COLORS['code_coral']}, {COLORS['code_coral_dm']})", COLORS['white']), |
| } |
|
|
| if rank in badge_styles: |
| label, gradient, text_color = badge_styles[rank] |
| return f""" |
| <div style=" |
| display: inline-flex; |
| align-items: center; |
| justify-content: center; |
| min-width: 48px; |
| padding: 4px 12px; |
| background: {gradient}; |
| color: {text_color}; |
| border-radius: 6px; |
| font-weight: 600; |
| font-size: 0.9em; |
| box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2); |
| "> |
| {label} |
| </div> |
| """ |
| return f""" |
| <div style=" |
| display: inline-flex; |
| align-items: center; |
| justify-content: center; |
| min-width: 28px; |
| color: var(--text-secondary); |
| font-weight: 500; |
| "> |
| {rank} |
| </div> |
| """ |
|
|
| def get_type_badge(model_type): |
| """Generate HTML for model type badge""" |
| bg_color = COLORS['disc_pink'] if model_type == 'Proprietary' else COLORS['data_green'] |
| return f""" |
| <div style=" |
| display: inline-flex; |
| align-items: center; |
| padding: 4px 8px; |
| background: {bg_color}; |
| color: white; |
| border-radius: 4px; |
| font-size: 0.85em; |
| font-weight: 500; |
| "> |
| {model_type} |
| </div> |
| """ |
|
|
| def get_score_bar(score, is_inverse=False): |
| """Generate HTML for score bar with gradient styling""" |
| if pd.isna(score) or score == '': |
| score = 0 |
| else: |
| score = float(score) |
| |
| width = score * 100 |
| |
| |
| if is_inverse: |
| gradient = f"linear-gradient(90deg, {COLORS['data_green']}, {COLORS['code_coral']})" |
| else: |
| gradient = f"linear-gradient(90deg, {COLORS['code_coral']}, {COLORS['data_green']})" |
| |
| return f""" |
| <div style="display: flex; align-items: center; gap: 12px; width: 100%;"> |
| <div style=" |
| flex-grow: 1; |
| height: 8px; |
| background: rgba(239, 235, 231, 0.1); |
| border-radius: 4px; |
| overflow: hidden; |
| max-width: 200px; |
| "> |
| <div style=" |
| width: {width}%; |
| height: 100%; |
| background: {gradient}; |
| border-radius: 4px; |
| transition: width 0.3s ease; |
| "></div> |
| </div> |
| <span style=" |
| font-family: 'SF Mono', monospace; |
| font-weight: 600; |
| color: var(--text-primary); |
| min-width: 60px; |
| ">{score:.3f}</span> |
| </div> |
| """ |
|
|
| def create_pii_leaderboard(): |
| """Create the main PII detection leaderboard interface""" |
| |
| def load_leaderboard_data(): |
| """Load and prepare the leaderboard data""" |
| return load_data() |
| |
| def generate_html_table(filtered_df, document_type, sort_by): |
| """Generate styled HTML table with rank badges and score bars""" |
| table_html = """ |
| <div class="v2-table-container"> |
| <table class="v2-styled-table"> |
| <thead> |
| <tr> |
| <th style="width: 80px;">Rank</th> |
| <th>Model</th> |
| <th style="width: 120px;">Type</th> |
| <th>Vendor</th> |
| <th style="width: 200px;">Overall Accuracy</th> |
| <th style="width: 150px;">Precision</th> |
| <th style="width: 150px;">Recall</th> |
| <th style="width: 150px;">F1 Score</th> |
| <th style="width: 160px;">Over-detection Rate</th> |
| <th>Cost/Doc ($)</th> |
| <th>Time (s)</th> |
| </tr> |
| </thead> |
| <tbody> |
| """ |
| |
| |
| for idx, (_, row) in enumerate(filtered_df.iterrows()): |
| rank = idx + 1 |
| table_html += f""" |
| <tr> |
| <td>{get_rank_badge(rank)}</td> |
| <td class="model-name">{row['Model']}</td> |
| <td>{get_type_badge(row['Model Type'])}</td> |
| <td>{row['Vendor']}</td> |
| """ |
| |
| |
| if document_type != "All": |
| |
| accuracy_col = f'{document_type} Accuracy' |
| accuracy = row.get(accuracy_col, row.get('Overall Accuracy', '')) |
| else: |
| |
| accuracy = row.get('Overall Accuracy', '') |
| |
| precision = row.get('Precision', '') |
| recall = row.get('Recall', '') |
| f1 = row.get('F1 Score', '') |
| over_detection = row.get('Over-redaction Rate', '') |
| cost = row.get('Cost per Document ($)', '') |
| time = row.get('Processing Time (s)', '') |
| |
| |
| if accuracy != '': |
| table_html += f'<td class="score-cell">{get_score_bar(accuracy)}</td>' |
| else: |
| table_html += '<td class="numeric-cell">-</td>' |
| |
| if precision != '': |
| table_html += f'<td class="score-cell">{get_score_bar(precision)}</td>' |
| else: |
| table_html += '<td class="numeric-cell">-</td>' |
| |
| if recall != '': |
| table_html += f'<td class="score-cell">{get_score_bar(recall)}</td>' |
| else: |
| table_html += '<td class="numeric-cell">-</td>' |
| |
| if f1 != '': |
| table_html += f'<td class="score-cell">{get_score_bar(f1)}</td>' |
| else: |
| table_html += '<td class="numeric-cell">-</td>' |
| |
| if over_detection != '': |
| table_html += f'<td class="score-cell">{get_score_bar(over_detection, is_inverse=True)}</td>' |
| else: |
| table_html += '<td class="numeric-cell">-</td>' |
| |
| |
| if cost != '': |
| cost_display = f'${float(cost):.3f}' |
| else: |
| cost_display = '-' |
| |
| if time != '': |
| time_display = f'{float(time):.1f}' |
| else: |
| time_display = '-' |
| |
| table_html += f""" |
| <td class="numeric-cell">{cost_display}</td> |
| <td class="numeric-cell">{time_display}</td> |
| </tr> |
| """ |
| |
| table_html += """ |
| </tbody> |
| </table> |
| </div> |
| """ |
| |
| return table_html |
| |
| def filter_and_sort_data(document_type, model_type_filter, sort_by, sort_order): |
| """Filter and sort the leaderboard data""" |
| df = load_leaderboard_data() |
| filtered_df = df.copy() |
| |
| |
| if document_type != "All": |
| |
| doc_col = f'{document_type} Accuracy' |
| if doc_col in filtered_df.columns: |
| filtered_df = filtered_df[filtered_df[doc_col] != ''] |
| |
| |
| if model_type_filter != "All": |
| if model_type_filter == "Open Source": |
| filtered_df = filtered_df[filtered_df['Model Type'] == 'Open Source'] |
| elif model_type_filter == "Proprietary": |
| filtered_df = filtered_df[filtered_df['Model Type'] == 'Proprietary'] |
| |
| |
| sort_column = sort_by |
| if document_type != "All" and sort_by == 'Overall Accuracy': |
| sort_column = f'{document_type} Accuracy' |
| |
| if sort_column in filtered_df.columns: |
| ascending = (sort_order == "Ascending") |
| |
| if sort_by == "Over-redaction Rate": |
| ascending = not ascending |
| filtered_df = filtered_df.sort_values(by=sort_column, ascending=ascending, na_position='last') |
| |
| return generate_html_table(filtered_df, document_type, sort_by) |
| |
| def generate_performance_card(model_name): |
| """Generate HTML for the model performance card""" |
| if not model_name: |
| return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;"> |
| Please select a model to generate its performance card |
| </div>""" |
| |
| df = load_leaderboard_data() |
| model_data = df[df['Model'] == model_name] |
| |
| if model_data.empty: |
| return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;"> |
| Model not found in the database |
| </div>""" |
| |
| row = model_data.iloc[0] |
| |
| |
| df_with_accuracy = df[df['Overall Accuracy'] != ''].copy() |
| df_with_accuracy['Overall Accuracy'] = pd.to_numeric(df_with_accuracy['Overall Accuracy'], errors='coerce') |
| df_sorted = df_with_accuracy.sort_values('Overall Accuracy', ascending=False).reset_index(drop=True) |
| try: |
| rank = df_sorted[df_sorted['Model'] == model_name].index[0] + 1 |
| except: |
| rank = 'N/A' |
| |
| |
| def format_value(val, decimals=3, prefix='', suffix=''): |
| if pd.isna(val) or val == '': |
| return 'N/A' |
| return f"{prefix}{float(val):.{decimals}f}{suffix}" |
| |
| |
| type_icon = "🔓" if row['Model Type'] == 'Open Source' else "🔒" |
| |
| |
| def get_performance_stars(value, max_val=1.0): |
| if pd.isna(value) or value == '': |
| return '⭐' * 0 |
| score = float(value) / max_val |
| if score >= 0.9: |
| return '⭐' * 5 |
| elif score >= 0.8: |
| return '⭐' * 4 |
| elif score >= 0.7: |
| return '⭐' * 3 |
| elif score >= 0.6: |
| return '⭐' * 2 |
| else: |
| return '⭐' * 1 |
| |
| |
| card_html = f""" |
| <div class="performance-card"> |
| <div class="card-header"> |
| <h1 class="card-model-name">{model_name}</h1> |
| <div class="card-stars"> |
| {get_performance_stars(row['Overall Accuracy'])} |
| </div> |
| </div> |
| |
| <div class="metrics-grid" style="margin-bottom: 24px;"> |
| <div class="metric-item"> |
| <div class="metric-icon" style="color: var(--accent-primary);">🏆</div> |
| <div class="metric-label">Overall Rank</div> |
| <div class="metric-value">#{rank}</div> |
| </div> |
| |
| <div class="metric-item"> |
| <div class="metric-icon" style="color: var(--accent-primary);">🎯</div> |
| <div class="metric-label">Overall Accuracy</div> |
| <div class="metric-value">{format_value(row['Overall Accuracy'])}</div> |
| </div> |
| |
| <div class="metric-item"> |
| <div class="metric-icon" style="color: var(--accent-secondary);">📊</div> |
| <div class="metric-label">Precision</div> |
| <div class="metric-value">{format_value(row['Precision'])}</div> |
| </div> |
| |
| <div class="metric-item"> |
| <div class="metric-icon" style="color: var(--accent-tertiary);">🔍</div> |
| <div class="metric-label">Recall</div> |
| <div class="metric-value">{format_value(row['Recall'])}</div> |
| </div> |
| |
| <div class="metric-item"> |
| <div class="metric-icon" style="color: var(--accent-quaternary);">💰</div> |
| <div class="metric-label">Cost/Doc</div> |
| <div class="metric-value">{format_value(row['Cost per Document ($)'], 3, '$')}</div> |
| </div> |
| |
| <div class="metric-item"> |
| <div class="metric-icon" style="color: var(--text-primary);">⚡</div> |
| <div class="metric-label">Processing Time</div> |
| <div class="metric-value">{format_value(row['Processing Time (s)'], 1, '', 's')}</div> |
| </div> |
| </div> |
| |
| <div class="domains-section" style="margin-top: 24px;"> |
| <h3 class="domains-title">📄 Document Type Performance</h3> |
| <div class="domains-grid"> |
| """ |
| |
| |
| doc_types = [ |
| ('🏥', 'Healthcare'), |
| ('💰', 'Financial'), |
| ('🏛️', 'Government'), |
| ('⚖️', 'Legal'), |
| ('👤', 'Personal') |
| ] |
| |
| for doc_icon, doc_type in doc_types: |
| accuracy_col = f'{doc_type} Accuracy' |
| accuracy_value = row.get(accuracy_col, '') |
| |
| if accuracy_value != '' and not pd.isna(accuracy_value): |
| score_display = f"{float(accuracy_value):.3f}" |
| score_color = "var(--accent-primary)" |
| else: |
| score_display = "N/A" |
| score_color = "var(--text-muted)" |
| |
| card_html += f""" |
| <div class="domain-item"> |
| <div class="domain-name">{doc_icon}</div> |
| <div style="font-size: 0.7rem; color: var(--text-secondary); margin-bottom: 2px;">{doc_type}</div> |
| <div class="domain-score" style="color: {score_color};">{score_display}</div> |
| </div> |
| """ |
| |
| card_html += f""" |
| </div> |
| </div> |
| |
| <div class="card-footer"> |
| <div class="card-url"> |
| <strong>LLM PII Detection Leaderboard</strong> |
| </div> |
| </div> |
| </div> |
| """ |
| |
| return card_html |
| |
| |
| initial_df = load_leaderboard_data() |
| initial_table = filter_and_sort_data("All", "All", "Overall Accuracy", "Descending") |
| |
| |
| gr.HTML(HEADER_CONTENT) |
| |
| |
| gr.HTML(""" |
| <div class="dark-container" style="margin-bottom: 32px;"> |
| <div class="section-header"> |
| <span class="section-icon" style="color: var(--accent-primary);">📈</span> |
| <h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Inter', sans-serif; font-weight: 700;"> |
| PII Detection Performance Leaderboard |
| </h3> |
| </div> |
| <p style="color: var(--text-secondary); margin-bottom: 20px; font-size: 1.1rem; font-family: 'Inter', sans-serif;"> |
| Filter by document type, model access, and sort by any metric to explore performance |
| </p> |
| |
| <!-- Document Type Filter --> |
| <div style="margin-bottom: 24px;"> |
| <h4 style="color: var(--text-primary); margin-bottom: 12px; font-size: 1rem;">📄 Document Type</h4> |
| """) |
| |
| document_type_filter = gr.Radio( |
| choices=["All", "Healthcare", "Financial", "Government", "Legal", "Personal"], |
| value="All", |
| label="", |
| interactive=True, |
| elem_classes=["document-type-radio"] |
| ) |
| |
| gr.HTML(""" |
| </div> |
| |
| <!-- Other Filters --> |
| <div style="margin-bottom: 24px;"> |
| <h4 style="color: var(--text-primary); margin-bottom: 12px; font-size: 1rem;">🔍 Filters & Sorting</h4> |
| </div> |
| """) |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| model_type_filter = gr.Radio( |
| choices=["All", "Open Source", "Proprietary"], |
| value="All", |
| label="🔓 Model Access", |
| elem_classes=["compact-radio"] |
| ) |
| |
| with gr.Column(scale=1): |
| sort_by = gr.Dropdown( |
| choices=["Overall Accuracy", "Precision", "Recall", "F1 Score", "Over-redaction Rate", "Cost per Document ($)", "Processing Time (s)"], |
| value="Overall Accuracy", |
| label="📊 Sort By", |
| elem_classes=["dropdown"] |
| ) |
| |
| with gr.Column(scale=1): |
| sort_order = gr.Radio( |
| choices=["Descending", "Ascending"], |
| value="Descending", |
| label="🔄 Sort Order", |
| elem_classes=["compact-radio"] |
| ) |
| |
| gr.HTML(""" |
| <!-- Leaderboard Table --> |
| <div style="margin-top: 24px;"> |
| <div class="dataframe-container"> |
| """) |
| |
| leaderboard_table = gr.HTML(initial_table) |
| |
| gr.HTML(""" |
| </div> |
| </div> |
| </div>""") |
| |
| |
| gr.HTML(f""" |
| <div class="dark-container" style="margin-top: 32px;"> |
| {METHODOLOGY} |
| </div> |
| """) |
| |
| |
| gr.HTML(""" |
| <div class="dark-container" style="margin-top: 32px;"> |
| <div class="section-header"> |
| <span class="section-icon" style="color: var(--accent-primary);">🎯</span> |
| <h3 style="margin: 0; color: var(--text-primary); font-size: 1.5rem; font-family: 'Inter', sans-serif; font-weight: 700;"> |
| Model Performance Cards |
| </h3> |
| </div> |
| <p style="color: var(--text-secondary); margin-bottom: 20px; font-size: 1.1rem; font-family: 'Inter', sans-serif; text-align: center;"> |
| Dive deep into individual model performance across all metrics and document types |
| </p> |
| |
| """) |
| |
| card_model_selector = gr.Dropdown( |
| choices=initial_df['Model'].tolist(), |
| value=initial_df['Model'].tolist()[0] if len(initial_df) > 0 else None, |
| label="🤖 Select Model", |
| info="Choose a model to view its performance card", |
| elem_classes=["dropdown"] |
| ) |
| |
| gr.HTML(""" |
| </div> |
| </div> |
| |
| <div style="width: 100%;"> |
| """) |
| |
| |
| initial_model = initial_df['Model'].tolist()[0] if len(initial_df) > 0 else None |
| initial_card_html = generate_performance_card(initial_model) if initial_model else "" |
| card_display = gr.HTML(value=initial_card_html, elem_id="performance-card-html") |
| |
| gr.HTML(""" |
| </div> |
| </div> |
| </div>""") |
| |
| |
| gr.HTML(f""" |
| <style> |
| .performance-card {{ |
| background: linear-gradient(145deg, rgba(26, 20, 20, 0.98) 0%, rgba(222, 157, 204, 0.05) 100%); |
| border: 2px solid var(--accent-primary); |
| border-radius: 24px; |
| padding: 32px; |
| max-width: 700px; |
| margin: 0 auto; |
| position: relative; |
| overflow: hidden; |
| box-shadow: |
| 0 20px 40px rgba(0, 0, 0, 0.5), |
| 0 0 80px rgba(222, 157, 204, 0.2), |
| inset 0 0 120px rgba(222, 157, 204, 0.05); |
| }} |
| |
| .card-header {{ |
| text-align: center; |
| margin-bottom: 24px; |
| position: relative; |
| z-index: 1; |
| }} |
| |
| .card-model-name {{ |
| font-size: 2rem; |
| font-weight: 800; |
| background: linear-gradient(135deg, var(--accent-primary) 0%, var(--accent-secondary) 100%); |
| -webkit-background-clip: text; |
| -webkit-text-fill-color: transparent; |
| margin-bottom: 8px; |
| text-shadow: 0 0 40px var(--glow-primary); |
| line-height: 1.2; |
| }} |
| |
| .card-stars {{ |
| font-size: 1.2rem; |
| margin: 8px 0; |
| }} |
| |
| .metrics-grid {{ |
| display: grid; |
| grid-template-columns: repeat(2, 1fr); |
| gap: 16px; |
| margin: 24px 0; |
| }} |
| |
| .metric-item {{ |
| display: flex; |
| flex-direction: column; |
| align-items: center; |
| padding: 16px; |
| background: rgba(239, 235, 231, 0.05); |
| border-radius: 12px; |
| border: 1px solid var(--border-subtle); |
| transition: all 0.3s ease; |
| }} |
| |
| .metric-item:hover {{ |
| transform: translateY(-2px); |
| border-color: var(--accent-primary); |
| box-shadow: 0 8px 16px rgba(222, 157, 204, 0.3); |
| }} |
| |
| .metric-icon {{ |
| font-size: 1.5rem; |
| margin-bottom: 8px; |
| }} |
| |
| .metric-label {{ |
| font-size: 0.85rem; |
| color: var(--text-secondary); |
| margin-bottom: 4px; |
| text-align: center; |
| }} |
| |
| .metric-value {{ |
| font-size: 1.1rem; |
| font-weight: 700; |
| color: var(--text-primary); |
| text-align: center; |
| }} |
| |
| .domains-section {{ |
| margin-top: 24px; |
| }} |
| |
| .domains-title {{ |
| color: var(--text-primary); |
| font-size: 1.2rem; |
| margin-bottom: 16px; |
| text-align: center; |
| }} |
| |
| .domains-grid {{ |
| display: grid; |
| grid-template-columns: repeat(5, 1fr); |
| gap: 12px; |
| }} |
| |
| .domain-item {{ |
| display: flex; |
| flex-direction: column; |
| align-items: center; |
| padding: 12px; |
| background: rgba(239, 235, 231, 0.03); |
| border-radius: 8px; |
| border: 1px solid var(--border-subtle); |
| transition: all 0.3s ease; |
| }} |
| |
| .domain-item:hover {{ |
| border-color: var(--accent-primary); |
| transform: scale(1.02); |
| }} |
| |
| .domain-name {{ |
| font-size: 1.2rem; |
| margin-bottom: 4px; |
| }} |
| |
| .domain-score {{ |
| font-size: 0.9rem; |
| font-weight: 600; |
| }} |
| |
| .card-footer {{ |
| text-align: center; |
| margin-top: 24px; |
| padding-top: 16px; |
| border-top: 1px solid var(--border-subtle); |
| }} |
| |
| .card-url {{ |
| color: var(--text-secondary); |
| font-size: 0.9rem; |
| }} |
| |
| /* Additional styling for radio buttons and specific components */ |
| .document-type-radio .wrap {{ |
| display: flex !important; |
| gap: 12px !important; |
| flex-wrap: wrap !important; |
| justify-content: center !important; |
| }} |
| |
| .document-type-radio .wrap > label {{ |
| flex: 1 !important; |
| min-width: 140px !important; |
| max-width: 180px !important; |
| padding: 12px 16px !important; |
| background: var(--bg-card) !important; |
| border: 2px solid var(--border-default) !important; |
| border-radius: 12px !important; |
| cursor: pointer !important; |
| transition: all 0.3s ease !important; |
| text-align: center !important; |
| font-weight: 500 !important; |
| }} |
| |
| .document-type-radio .wrap > label:hover {{ |
| border-color: var(--accent-primary) !important; |
| transform: translateY(-2px) !important; |
| }} |
| |
| .document-type-radio .wrap > label:has(input[type="radio"]:checked) {{ |
| background: transparent !important; |
| border-color: var(--accent-primary) !important; |
| color: var(--text-primary) !important; |
| font-weight: 600 !important; |
| box-shadow: 0 8px 16px var(--glow-primary) !important; |
| }} |
| |
| .document-type-radio input[type="radio"] {{ |
| display: none !important; |
| }} |
| |
| .compact-radio .wrap > label {{ |
| padding: 8px 12px !important; |
| font-size: 0.85rem !important; |
| min-width: auto !important; |
| max-width: 120px !important; |
| }} |
| </style> |
| """) |
| |
| |
| def update_table(*args): |
| return filter_and_sort_data(*args) |
| |
| def update_card(model_name): |
| return generate_performance_card(model_name) |
| |
| |
| filter_inputs = [document_type_filter, model_type_filter, sort_by, sort_order] |
| |
| for input_component in filter_inputs: |
| input_component.change( |
| fn=update_table, |
| inputs=filter_inputs, |
| outputs=[leaderboard_table] |
| ) |
| |
| |
| card_model_selector.change( |
| fn=update_card, |
| inputs=[card_model_selector], |
| outputs=[card_display] |
| ) |
|
|
| def create_app(): |
| """Create the main Gradio application""" |
| with gr.Blocks( |
| theme=gr.themes.Default(), |
| title="🔒 LLM PII Detection Leaderboard" |
| ) as app: |
| create_pii_leaderboard() |
| |
| return app |
|
|
| if __name__ == "__main__": |
| demo = create_app() |
| demo.launch() |