# Entity Counts import gradio as gr import pandas as pd import numpy as np from datetime import datetime def get_leaderboard_data(): """ Real PII Detection leaderboard data from your evaluation results. Based on actual evaluation outputs from your normalized evaluation script. NOW WITH 41 TEAMS INCLUDING LATEST SUBMISSIONS! """ data = { 'Rank': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], 'Team': [ 'Premise (submission 3)', 'Premise (submission 2)', 'صقور الأرض (submission 9)', 'Premise', 'Sebaweeh (submission 11)', 'صقور الأرض (submission 8)', 'صقور الأرض (submission 5)', 'TheConsultants (submission 3)', 'Dynamic (submission 4)', 'صقور الأرض (submission 4)', 'صقور الأرض (submission 3)', 'Dynamic (submission 3)', 'The LADS (submission 5)', 'Sebaweeh (submission 10)', 'Dynamic (submission 2)', 'Prophytech-AI (submission 2)', # New with 0.5341 'صقور الأرض (submission 1)', 'Sebaweeh (submission 9)', 'ByFi (submission 3)', 'Gang of Four', # New with 0.5153 'The LADS (submission 4)', 'صقور الأرض (submission 2)', 'Nutoq', # New with 0.5072 'Sebaweeh (submission 8)', 'Dynamic', 'ByFi', 'ByFi (submission 2)', 'TheConsultants', 'Sebaweeh (submission 7)', 'Prophytech-AI', 'The LADS (submission 3)', 'TheConsultants (submission 2)', 'SaRA (submission 2)', 'The LADS (submission 2)', 'Sebaweeh (submission 6)', 'Sebaweeh (submission 4)', 'Why Not', 'Sebaweeh (submission 5)', 'The LADS', 'AEye', 'Sebaweeh (submission 3)', 'NICE', 'SaRA (submission 1)', 'Sebaweeh (submission 2)', 'Sebaweeh (submission 1)' ], # Main Score (Best Overall Score from your results) 'Best Overall Score': [0.6015, 0.5996, 0.5973, 0.5973, 0.5782, 0.5726, 0.5705, 0.5575, 0.5522, 0.5506, 0.5394, 0.5411, 0.5359, 0.5358, 0.5344, 0.5341, 0.5333, 0.5225, 0.5165, 0.5153, 0.5103, 0.5089, 0.5072, 0.5053, 0.5040, 0.5012, 0.4996, 0.4986, 0.4945, 0.4938, 0.4892, 0.4817, 0.4406, 0.4145, 0.4095, 0.3938, 0.3845, 0.3519, 0.3346, 0.3180, 0.2846, 0.2667, 0.2633, 0.2630, 0.2457], # Exact Match Metrics (Macro) 'Exact F1': [0.0142, 0.0143, 0.0154, 0.0143, 0.0298, 0.0244, 0.0188, 0.0298, 0.0298, 0.0244, 0.0188, 0.0256, 0.0106, 0.0241, 0.0239, 0.0239, 0.0237, 0.0185, 0.0169, 0.0133, 0.0101, 0.0171, 0.0094, 0.0098, 0.0179, 0.0161, 0.0161, 0.0145, 0.0104, 0.0181, 0.0089, 0.0132, 0.0113, 0.0079, 0.0096, 0.0075, 0.0088, 0.0076, 0.0077, 0.0081, 0.0053, 0.0039, 0.0058, 0.0053, 0.0021], 'Exact Precision': [0.015, 0.015, 0.016, 0.015, 0.029, 0.029, 0.023, 0.029, 0.029, 0.029, 0.023, 0.029, 0.011, 0.029, 0.029, 0.029, 0.029, 0.023, 0.022, 0.016, 0.013, 0.021, 0.012, 0.014, 0.020, 0.021, 0.021, 0.018, 0.016, 0.020, 0.012, 0.018, 0.015, 0.011, 0.011, 0.009, 0.009, 0.006, 0.015, 0.013, 0.004, 0.003, 0.005, 0.004, 0.001], 'Exact Recall': [0.013, 0.013, 0.015, 0.014, 0.021, 0.021, 0.016, 0.021, 0.021, 0.021, 0.016, 0.020, 0.010, 0.021, 0.020, 0.020, 0.020, 0.016, 0.014, 0.011, 0.008, 0.015, 0.008, 0.007, 0.016, 0.013, 0.013, 0.012, 0.008, 0.017, 0.007, 0.010, 0.009, 0.006, 0.009, 0.007, 0.008, 0.010, 0.005, 0.006, 0.011, 0.005, 0.008, 0.010, 0.007], # Partial Match Metrics (Macro) 'Partial F1': [0.6015, 0.5996, 0.5973, 0.5973, 0.5782, 0.5726, 0.5705, 0.5575, 0.5522, 0.5506, 0.5394, 0.5411, 0.5359, 0.5358, 0.5344, 0.5341, 0.5333, 0.5225, 0.5165, 0.5153, 0.5103, 0.5089, 0.5072, 0.5053, 0.5040, 0.5012, 0.4996, 0.4986, 0.4945, 0.4938, 0.4892, 0.4817, 0.4406, 0.4145, 0.4095, 0.3938, 0.3845, 0.3519, 0.3346, 0.3180, 0.2846, 0.2667, 0.2633, 0.2630, 0.2457], 'Partial Precision': [0.647, 0.642, 0.634, 0.637, 0.659, 0.457, 0.655, 0.659, 0.659, 0.657, 0.646, 0.647, 0.445, 0.636, 0.647, 0.647, 0.644, 0.630, 0.655, 0.622, 0.669, 0.610, 0.669, 0.740, 0.560, 0.662, 0.659, 0.634, 0.740, 0.536, 0.669, 0.670, 0.596, 0.590, 0.456, 0.458, 0.398, 0.280, 0.649, 0.494, 0.190, 0.231, 0.204, 0.179, 0.143], 'Partial Recall': [0.562, 0.562, 0.565, 0.562, 0.461, 0.495, 0.491, 0.461, 0.461, 0.488, 0.463, 0.461, 0.408, 0.463, 0.455, 0.455, 0.455, 0.410, 0.413, 0.440, 0.419, 0.436, 0.408, 0.384, 0.458, 0.403, 0.402, 0.411, 0.371, 0.457, 0.385, 0.376, 0.350, 0.319, 0.372, 0.346, 0.372, 0.474, 0.225, 0.234, 0.569, 0.316, 0.370, 0.495, 0.854], # IoU 50% Metrics (Macro) 'IoU50 F1': [0.2518, 0.2557, 0.2571, 0.2543, 0.2584, 0.1867, 0.1867, 0.2684, 0.2584, 0.2461, 0.2414, 0.2474, 0.2220, 0.2431, 0.2439, 0.2439, 0.2434, 0.2142, 0.2162, 0.2141, 0.2070, 0.2289, 0.2252, 0.1759, 0.2088, 0.2170, 0.2165, 0.2118, 0.1717, 0.1992, 0.2100, 0.2071, 0.1807, 0.1676, 0.1539, 0.1490, 0.1444, 0.1409, 0.1244, 0.1058, 0.1099, 0.0646, 0.0733, 0.1012, 0.0871], 'IoU50 Precision': [0.271, 0.274, 0.273, 0.271, 0.298, 0.189, 0.187, 0.298, 0.298, 0.291, 0.289, 0.298, 0.159, 0.289, 0.295, 0.295, 0.294, 0.264, 0.280, 0.258, 0.276, 0.275, 0.297, 0.258, 0.232, 0.287, 0.286, 0.269, 0.257, 0.216, 0.287, 0.288, 0.244, 0.239, 0.171, 0.173, 0.149, 0.112, 0.241, 0.164, 0.073, 0.056, 0.057, 0.069, 0.051], 'IoU50 Recall': [0.235, 0.240, 0.243, 0.239, 0.218, 0.194, 0.192, 0.218, 0.218, 0.213, 0.207, 0.218, 0.146, 0.210, 0.208, 0.208, 0.208, 0.180, 0.176, 0.183, 0.166, 0.196, 0.181, 0.134, 0.190, 0.175, 0.174, 0.174, 0.129, 0.185, 0.165, 0.162, 0.143, 0.129, 0.140, 0.131, 0.140, 0.190, 0.084, 0.078, 0.220, 0.077, 0.103, 0.190, 0.303], } # Verify all arrays have exactly 42 elements for key, values in data.items(): if len(values) != 42: print(f"ERROR: {key} has {len(values)} values, expected 42") else: print(f"✓ {key}: {len(values)} values") # Debug: Print the data to verify df = pd.DataFrame(data) print(f"DataFrame shape: {df.shape}") print(f"Number of teams: {len(df)}") print(f"Sebaweeh (submission 9) at rank 3: {df.iloc[2]['Team'] == 'Sebaweeh (submission 9)'}") print(f"Teams: {df['Team'].tolist()}") return df def format_leaderboard(df): """Format the dataframe for better display""" # Create a copy to avoid modifying original display_df = df.copy() # Format score columns to 4 decimal places for precision score_columns = ['Best Overall Score', 'Exact F1', 'Exact Precision', 'Exact Recall', 'Partial F1', 'Partial Precision', 'Partial Recall', 'IoU50 F1', 'IoU50 Precision', 'IoU50 Recall', 'Value F1', 'Value Precision', 'Value Recall'] for col in score_columns: if col in display_df.columns: display_df[col] = display_df[col].apply(lambda x: f"{x:.4f}") # Format entity counts entity_columns = ['GT Entities', 'Pred Entities', 'TP Exact', 'TP Partial', 'TP IoU50', 'TP Value'] for col in entity_columns: if col in display_df.columns: display_df[col] = display_df[col].apply(lambda x: f"{x:,}") return display_df def update_leaderboard(): """Update the leaderboard data""" df = get_leaderboard_data() formatted_df = format_leaderboard(df) print(f"Formatted DataFrame shape: {formatted_df.shape}") return formatted_df # Custom CSS for styling css = """ .gradio-container { font-family: 'Helvetica Neue', Arial, sans-serif; } .leaderboard-title { text-align: center; color: #2c3e50; margin-bottom: 20px; } .dataframe { font-size: 14px; } .dataframe th { background-color: #3498db !important; color: white !important; font-weight: bold; text-align: center; } .dataframe td { text-align: center; padding: 8px; } .dataframe tr:nth-child(even) { background-color: #f8f9fa; } .dataframe tr:nth-child(odd) { background-color: white; } .dataframe tr:hover { background-color: #e3f2fd; } .refresh-btn { background-color: #27ae60 !important; color: white !important; } /* Highlight the new world record and ultimate champion */ .dataframe tr:nth-child(2) { background-color: #ffd700 !important; border-left: 10px solid #ff1744; font-weight: bold; font-size: 18px; box-shadow: 0 6px 15px rgba(255, 23, 68, 0.6); animation: champion-glow 2s ease-in-out infinite alternate; } @keyframes champion-glow { from { box-shadow: 0 6px 15px rgba(255, 23, 68, 0.6); background-color: #ffd700; } to { box-shadow: 0 8px 20px rgba(255, 23, 68, 0.9); background-color: #ffed4a; } } .dataframe tr:nth-child(3) { background-color: #fff8e1 !important; border-left: 6px solid #ff6b35; font-weight: bold; } .dataframe tr:nth-child(4) { background-color: #fff3cd !important; border-left: 6px solid #ffc107; font-weight: bold; } """ # Create the Gradio interface def create_leaderboard(): with gr.Blocks(css=css, title="PII Detection Leaderboard") as demo: gr.Markdown( """ # 🏆 PII Detection Model Leaderboard A comprehensive ranking of PII detection teams based on exact, partial, and label-based matching performance. Last updated: {} """.format(datetime.now().strftime("%Y-%m-%d %H:%M:%S")), elem_classes="leaderboard-title" ) with gr.Row(): with gr.Column(): # Get initial data initial_data = update_leaderboard() print(f"Final initial data for display: {initial_data.shape}") print(f"Final teams count: {len(initial_data)}") leaderboard_table = gr.DataFrame( value=initial_data, headers=["Rank", "Team", "Best Overall Score", "Exact F1", "Exact Precision", "Exact Recall", "Partial F1", "Partial Precision", "Partial Recall", "IoU50 F1", "IoU50 Precision", "IoU50 Recall", "Value F1", "Value Precision", "Value Recall", "GT Entities", "Pred Entities", "TP Exact", "TP Partial", "TP IoU50", "TP Value", "Date Added"], datatype=["number"] + ["str"] * 21, # 22 total columns: 1 number + 21 strings interactive=False, wrap=True ) # Statistics section with gr.Row(): with gr.Column(): gr.Markdown("### 📊 Statistics") def get_stats(): df = get_leaderboard_data() return f"""Total Teams: {len(df)}""" stats_text = gr.Textbox( value=get_stats(), label="Quick Stats", lines=6, interactive=False ) # Info section gr.Markdown( """ ### ℹ️ About This PII Detection Leaderboard This leaderboard ranks PII (Personally Identifiable Information) detection teams based on comprehensive benchmarks: **Main Metrics:** - **Best Overall Score**: Primary ranking metric (highest of all F1 scores) - **Exact F1/Precision/Recall**: Perfect position and label match - **Partial F1/Precision/Recall**: Overlapping entities with correct detection - **IoU50 F1/Precision/Recall**: 50%+ IoU overlap with correct detection - **Value F1/Precision/Recall**: Exact value match regardless of position - **GT/Pred Entities**: Ground truth vs predicted entity counts - **TP (True Positives)**: Successful detections for each match type **Evaluation Types:** - **Exact Match**: Most strict - requires perfect boundary and label alignment - **Partial Match**: Allows overlapping boundaries but requires correct label - **IoU50 Match**: Requires 50%+ overlap with correct detection - **Value Match**: Exact value match regardless of position """ ) # Event handlers def refresh_data(): return update_leaderboard() return demo # Launch the app if __name__ == "__main__": demo = create_leaderboard() demo.launch( server_name="0.0.0.0", # Important for Hugging Face Spaces server_port=7860, # Default port for HF Spaces share=False )