Arabic_PII_Leaderboard

Running

App Files Files Community

Arabic_PII_Leaderboard / app.py

muhyzater

Update app.py

6694cbb verified 6 months ago

raw

history blame contribute delete

13.3 kB

	# Entity Counts
	import gradio as gr
	import pandas as pd
	import numpy as np
	from datetime import datetime

	def get_leaderboard_data():
	"""
	Real PII Detection leaderboard data from your evaluation results.
	Based on actual evaluation outputs from your normalized evaluation script.
	NOW WITH 41 TEAMS INCLUDING LATEST SUBMISSIONS!
	"""
	data = {
	'Rank': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45],
	'Team': [
	'Premise (submission 3)',
	'Premise (submission 2)',
	'صقور الأرض (submission 9)',
	'Premise',
	'Sebaweeh (submission 11)',
	'صقور الأرض (submission 8)',
	'صقور الأرض (submission 5)',
	'TheConsultants (submission 3)',
	'Dynamic (submission 4)',
	'صقور الأرض (submission 4)',
	'صقور الأرض (submission 3)',
	'Dynamic (submission 3)',
	'The LADS (submission 5)',
	'Sebaweeh (submission 10)',
	'Dynamic (submission 2)',
	'Prophytech-AI (submission 2)', # New with 0.5341
	'صقور الأرض (submission 1)',
	'Sebaweeh (submission 9)',
	'ByFi (submission 3)',
	'Gang of Four', # New with 0.5153
	'The LADS (submission 4)',
	'صقور الأرض (submission 2)',
	'Nutoq', # New with 0.5072
	'Sebaweeh (submission 8)',
	'Dynamic',
	'ByFi',
	'ByFi (submission 2)',
	'TheConsultants',
	'Sebaweeh (submission 7)',
	'Prophytech-AI',
	'The LADS (submission 3)',
	'TheConsultants (submission 2)',
	'SaRA (submission 2)',
	'The LADS (submission 2)',
	'Sebaweeh (submission 6)',
	'Sebaweeh (submission 4)',
	'Why Not',
	'Sebaweeh (submission 5)',
	'The LADS',
	'AEye',
	'Sebaweeh (submission 3)',
	'NICE',
	'SaRA (submission 1)',
	'Sebaweeh (submission 2)',
	'Sebaweeh (submission 1)'
	],

	# Main Score (Best Overall Score from your results)
	'Best Overall Score': [0.6015, 0.5996, 0.5973, 0.5973, 0.5782, 0.5726, 0.5705, 0.5575, 0.5522, 0.5506, 0.5394, 0.5411, 0.5359, 0.5358, 0.5344, 0.5341, 0.5333, 0.5225, 0.5165, 0.5153, 0.5103, 0.5089, 0.5072, 0.5053, 0.5040, 0.5012, 0.4996, 0.4986, 0.4945, 0.4938, 0.4892, 0.4817, 0.4406, 0.4145, 0.4095, 0.3938, 0.3845, 0.3519, 0.3346, 0.3180, 0.2846, 0.2667, 0.2633, 0.2630, 0.2457],

	# Exact Match Metrics (Macro)
	'Exact F1': [0.0142, 0.0143, 0.0154, 0.0143, 0.0298, 0.0244, 0.0188, 0.0298, 0.0298, 0.0244, 0.0188, 0.0256, 0.0106, 0.0241, 0.0239, 0.0239, 0.0237, 0.0185, 0.0169, 0.0133, 0.0101, 0.0171, 0.0094, 0.0098, 0.0179, 0.0161, 0.0161, 0.0145, 0.0104, 0.0181, 0.0089, 0.0132, 0.0113, 0.0079, 0.0096, 0.0075, 0.0088, 0.0076, 0.0077, 0.0081, 0.0053, 0.0039, 0.0058, 0.0053, 0.0021],
	'Exact Precision': [0.015, 0.015, 0.016, 0.015, 0.029, 0.029, 0.023, 0.029, 0.029, 0.029, 0.023, 0.029, 0.011, 0.029, 0.029, 0.029, 0.029, 0.023, 0.022, 0.016, 0.013, 0.021, 0.012, 0.014, 0.020, 0.021, 0.021, 0.018, 0.016, 0.020, 0.012, 0.018, 0.015, 0.011, 0.011, 0.009, 0.009, 0.006, 0.015, 0.013, 0.004, 0.003, 0.005, 0.004, 0.001],
	'Exact Recall': [0.013, 0.013, 0.015, 0.014, 0.021, 0.021, 0.016, 0.021, 0.021, 0.021, 0.016, 0.020, 0.010, 0.021, 0.020, 0.020, 0.020, 0.016, 0.014, 0.011, 0.008, 0.015, 0.008, 0.007, 0.016, 0.013, 0.013, 0.012, 0.008, 0.017, 0.007, 0.010, 0.009, 0.006, 0.009, 0.007, 0.008, 0.010, 0.005, 0.006, 0.011, 0.005, 0.008, 0.010, 0.007],

	# Partial Match Metrics (Macro)
	'Partial F1': [0.6015, 0.5996, 0.5973, 0.5973, 0.5782, 0.5726, 0.5705, 0.5575, 0.5522, 0.5506, 0.5394, 0.5411, 0.5359, 0.5358, 0.5344, 0.5341, 0.5333, 0.5225, 0.5165, 0.5153, 0.5103, 0.5089, 0.5072, 0.5053, 0.5040, 0.5012, 0.4996, 0.4986, 0.4945, 0.4938, 0.4892, 0.4817, 0.4406, 0.4145, 0.4095, 0.3938, 0.3845, 0.3519, 0.3346, 0.3180, 0.2846, 0.2667, 0.2633, 0.2630, 0.2457],
	'Partial Precision': [0.647, 0.642, 0.634, 0.637, 0.659, 0.457, 0.655, 0.659, 0.659, 0.657, 0.646, 0.647, 0.445, 0.636, 0.647, 0.647, 0.644, 0.630, 0.655, 0.622, 0.669, 0.610, 0.669, 0.740, 0.560, 0.662, 0.659, 0.634, 0.740, 0.536, 0.669, 0.670, 0.596, 0.590, 0.456, 0.458, 0.398, 0.280, 0.649, 0.494, 0.190, 0.231, 0.204, 0.179, 0.143],
	'Partial Recall': [0.562, 0.562, 0.565, 0.562, 0.461, 0.495, 0.491, 0.461, 0.461, 0.488, 0.463, 0.461, 0.408, 0.463, 0.455, 0.455, 0.455, 0.410, 0.413, 0.440, 0.419, 0.436, 0.408, 0.384, 0.458, 0.403, 0.402, 0.411, 0.371, 0.457, 0.385, 0.376, 0.350, 0.319, 0.372, 0.346, 0.372, 0.474, 0.225, 0.234, 0.569, 0.316, 0.370, 0.495, 0.854],

	# IoU 50% Metrics (Macro)
	'IoU50 F1': [0.2518, 0.2557, 0.2571, 0.2543, 0.2584, 0.1867, 0.1867, 0.2684, 0.2584, 0.2461, 0.2414, 0.2474, 0.2220, 0.2431, 0.2439, 0.2439, 0.2434, 0.2142, 0.2162, 0.2141, 0.2070, 0.2289, 0.2252, 0.1759, 0.2088, 0.2170, 0.2165, 0.2118, 0.1717, 0.1992, 0.2100, 0.2071, 0.1807, 0.1676, 0.1539, 0.1490, 0.1444, 0.1409, 0.1244, 0.1058, 0.1099, 0.0646, 0.0733, 0.1012, 0.0871],
	'IoU50 Precision': [0.271, 0.274, 0.273, 0.271, 0.298, 0.189, 0.187, 0.298, 0.298, 0.291, 0.289, 0.298, 0.159, 0.289, 0.295, 0.295, 0.294, 0.264, 0.280, 0.258, 0.276, 0.275, 0.297, 0.258, 0.232, 0.287, 0.286, 0.269, 0.257, 0.216, 0.287, 0.288, 0.244, 0.239, 0.171, 0.173, 0.149, 0.112, 0.241, 0.164, 0.073, 0.056, 0.057, 0.069, 0.051],
	'IoU50 Recall': [0.235, 0.240, 0.243, 0.239, 0.218, 0.194, 0.192, 0.218, 0.218, 0.213, 0.207, 0.218, 0.146, 0.210, 0.208, 0.208, 0.208, 0.180, 0.176, 0.183, 0.166, 0.196, 0.181, 0.134, 0.190, 0.175, 0.174, 0.174, 0.129, 0.185, 0.165, 0.162, 0.143, 0.129, 0.140, 0.131, 0.140, 0.190, 0.084, 0.078, 0.220, 0.077, 0.103, 0.190, 0.303],

	}

	# Verify all arrays have exactly 42 elements
	for key, values in data.items():
	if len(values) != 42:
	print(f"ERROR: {key} has {len(values)} values, expected 42")
	else:
	print(f"✓ {key}: {len(values)} values")

	# Debug: Print the data to verify
	df = pd.DataFrame(data)
	print(f"DataFrame shape: {df.shape}")
	print(f"Number of teams: {len(df)}")
	print(f"Sebaweeh (submission 9) at rank 3: {df.iloc[2]['Team'] == 'Sebaweeh (submission 9)'}")
	print(f"Teams: {df['Team'].tolist()}")

	return df

	def format_leaderboard(df):
	"""Format the dataframe for better display"""
	# Create a copy to avoid modifying original
	display_df = df.copy()

	# Format score columns to 4 decimal places for precision
	score_columns = ['Best Overall Score', 'Exact F1', 'Exact Precision', 'Exact Recall',
	'Partial F1', 'Partial Precision', 'Partial Recall',
	'IoU50 F1', 'IoU50 Precision', 'IoU50 Recall',
	'Value F1', 'Value Precision', 'Value Recall']

	for col in score_columns:
	if col in display_df.columns:
	display_df[col] = display_df[col].apply(lambda x: f"{x:.4f}")

	# Format entity counts
	entity_columns = ['GT Entities', 'Pred Entities', 'TP Exact', 'TP Partial', 'TP IoU50', 'TP Value']
	for col in entity_columns:
	if col in display_df.columns:
	display_df[col] = display_df[col].apply(lambda x: f"{x:,}")

	return display_df

	def update_leaderboard():
	"""Update the leaderboard data"""
	df = get_leaderboard_data()
	formatted_df = format_leaderboard(df)
	print(f"Formatted DataFrame shape: {formatted_df.shape}")
	return formatted_df

	# Custom CSS for styling
	css = """
	.gradio-container {
	font-family: 'Helvetica Neue', Arial, sans-serif;
	}

	.leaderboard-title {
	text-align: center;
	color: #2c3e50;
	margin-bottom: 20px;
	}

	.dataframe {
	font-size: 14px;
	}

	.dataframe th {
	background-color: #3498db !important;
	color: white !important;
	font-weight: bold;
	text-align: center;
	}

	.dataframe td {
	text-align: center;
	padding: 8px;
	}

	.dataframe tr:nth-child(even) {
	background-color: #f8f9fa;
	}

	.dataframe tr:nth-child(odd) {
	background-color: white;
	}

	.dataframe tr:hover {
	background-color: #e3f2fd;
	}

	.refresh-btn {
	background-color: #27ae60 !important;
	color: white !important;
	}

	/* Highlight the new world record and ultimate champion */
	.dataframe tr:nth-child(2) {
	background-color: #ffd700 !important;
	border-left: 10px solid #ff1744;
	font-weight: bold;
	font-size: 18px;
	box-shadow: 0 6px 15px rgba(255, 23, 68, 0.6);
	animation: champion-glow 2s ease-in-out infinite alternate;
	}

	@keyframes champion-glow {
	from {
	box-shadow: 0 6px 15px rgba(255, 23, 68, 0.6);
	background-color: #ffd700;
	}
	to {
	box-shadow: 0 8px 20px rgba(255, 23, 68, 0.9);
	background-color: #ffed4a;
	}
	}

	.dataframe tr:nth-child(3) {
	background-color: #fff8e1 !important;
	border-left: 6px solid #ff6b35;
	font-weight: bold;
	}

	.dataframe tr:nth-child(4) {
	background-color: #fff3cd !important;
	border-left: 6px solid #ffc107;
	font-weight: bold;
	}
	"""

	# Create the Gradio interface
	def create_leaderboard():
	with gr.Blocks(css=css, title="PII Detection Leaderboard") as demo:
	gr.Markdown(
	"""
	# 🏆 PII Detection Model Leaderboard

	A comprehensive ranking of PII detection teams based on exact, partial, and label-based matching performance.


	Last updated: {}

	""".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
	elem_classes="leaderboard-title"
	)

	with gr.Row():
	with gr.Column():
	# Get initial data
	initial_data = update_leaderboard()
	print(f"Final initial data for display: {initial_data.shape}")
	print(f"Final teams count: {len(initial_data)}")

	leaderboard_table = gr.DataFrame(
	value=initial_data,
	headers=["Rank", "Team", "Best Overall Score", "Exact F1", "Exact Precision", "Exact Recall",
	"Partial F1", "Partial Precision", "Partial Recall",
	"IoU50 F1", "IoU50 Precision", "IoU50 Recall",
	"Value F1", "Value Precision", "Value Recall",
	"GT Entities", "Pred Entities",
	"TP Exact", "TP Partial", "TP IoU50", "TP Value", "Date Added"],
	datatype=["number"] + ["str"] * 21, # 22 total columns: 1 number + 21 strings
	interactive=False,
	wrap=True
	)

	# Statistics section
	with gr.Row():
	with gr.Column():
	gr.Markdown("### 📊 Statistics")
	def get_stats():
	df = get_leaderboard_data()
	return f"""Total Teams: {len(df)}"""

	stats_text = gr.Textbox(
	value=get_stats(),
	label="Quick Stats",
	lines=6,
	interactive=False
	)

	# Info section
	gr.Markdown(
	"""
	### ℹ️ About This PII Detection Leaderboard

	This leaderboard ranks PII (Personally Identifiable Information) detection teams based on comprehensive benchmarks:

	Main Metrics:
	- Best Overall Score: Primary ranking metric (highest of all F1 scores)
	- Exact F1/Precision/Recall: Perfect position and label match
	- Partial F1/Precision/Recall: Overlapping entities with correct detection
	- IoU50 F1/Precision/Recall: 50%+ IoU overlap with correct detection
	- Value F1/Precision/Recall: Exact value match regardless of position
	- GT/Pred Entities: Ground truth vs predicted entity counts
	- TP (True Positives): Successful detections for each match type

	Evaluation Types:
	- Exact Match: Most strict - requires perfect boundary and label alignment
	- Partial Match: Allows overlapping boundaries but requires correct label
	- IoU50 Match: Requires 50%+ overlap with correct detection
	- Value Match: Exact value match regardless of position

	"""
	)

	# Event handlers
	def refresh_data():
	return update_leaderboard()

	return demo

	# Launch the app
	if __name__ == "__main__":
	demo = create_leaderboard()
	demo.launch(
	server_name="0.0.0.0", # Important for Hugging Face Spaces
	server_port=7860, # Default port for HF Spaces
	share=False
	)