Spaces:
Running
Running
Yassine El Kheir commited on
Commit ·
694d1e5
1
Parent(s): a61a0bf
fix leaderboard history
Browse files- Leaderboard/app.py +13 -78
- Leaderboard/utils.py +3 -0
- app.py +13 -78
- utils.py +3 -0
Leaderboard/app.py
CHANGED
|
@@ -7,11 +7,11 @@ import numpy as np
|
|
| 7 |
from utils import load_leaderboard, custom_css, load_ground_truth_references, calculate_comprehensive_metrics, parse_submission_csv
|
| 8 |
|
| 9 |
# Paths
|
| 10 |
-
|
| 11 |
-
LEADERBOARD_TSV =
|
| 12 |
-
#
|
| 13 |
-
GROUND_TRUTH_CSV = "
|
| 14 |
-
ALLOWED_TEAMS_FILE = "list.txt"
|
| 15 |
|
| 16 |
LEADERBOARD_COLUMNS = [
|
| 17 |
"Team", "F1-score", "Precision", "Recall", "Correct Rate",
|
|
@@ -216,12 +216,17 @@ def submit_and_update_leaderboard(team_name, submission_file):
|
|
| 216 |
|
| 217 |
# Logic for saving history and updating file
|
| 218 |
# Create history directory if not exists
|
| 219 |
-
history_dir = "leaderboard_history"
|
| 220 |
os.makedirs(history_dir, exist_ok=True)
|
| 221 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
# Backup current leaderboard
|
| 223 |
-
timestamp = datetime.now().strftime("%
|
| 224 |
-
backup_path = os.path.join(
|
| 225 |
if os.path.exists(LEADERBOARD_TSV):
|
| 226 |
shutil.copy2(LEADERBOARD_TSV, backup_path)
|
| 227 |
|
|
@@ -249,76 +254,6 @@ def submit_and_update_leaderboard(team_name, submission_file):
|
|
| 249 |
import traceback
|
| 250 |
error_msg = f"Error during submission: {str(e)}\n{traceback.format_exc()}"
|
| 251 |
return error_msg, get_latest_leaderboard()
|
| 252 |
-
|
| 253 |
-
if error_message:
|
| 254 |
-
return error_message, get_latest_leaderboard()
|
| 255 |
-
|
| 256 |
-
if metrics is None:
|
| 257 |
-
return "Error: Metrics could not be calculated.", get_latest_leaderboard()
|
| 258 |
-
|
| 259 |
-
# Map or Add 'Correct Rate' if present in metrics (e.g. from Accuracy)
|
| 260 |
-
if "Accuracy" in metrics:
|
| 261 |
-
metrics["Correct Rate"] = metrics["Accuracy"]
|
| 262 |
-
|
| 263 |
-
new_f1 = metrics.get("F1-score", 0)
|
| 264 |
-
|
| 265 |
-
# Check if team already exists
|
| 266 |
-
if team_name in current_df["Team"].values:
|
| 267 |
-
existing_row = current_df[current_df["Team"] == team_name]
|
| 268 |
-
# Use raw value not string representation if possible, assume load_leaderboard parses as float where possible
|
| 269 |
-
old_f1 = existing_row["F1-score"].values[0] if "F1-score" in existing_row.columns and pd.notna(existing_row["F1-score"].values[0]) else -1.0
|
| 270 |
-
|
| 271 |
-
if new_f1 <= old_f1:
|
| 272 |
-
return f"Submission successful but no improvement. New F1: {new_f1:.4f} <= Old F1: {old_f1:.4f}", get_latest_leaderboard()
|
| 273 |
-
|
| 274 |
-
# Improvement found, update existing entry
|
| 275 |
-
for metric_name, metric_value in metrics.items():
|
| 276 |
-
if metric_name in current_df.columns:
|
| 277 |
-
current_df.loc[current_df["Team"] == team_name, metric_name] = metric_value
|
| 278 |
-
else:
|
| 279 |
-
# Add new entry with all metrics
|
| 280 |
-
new_entry_dict = {"Team": team_name}
|
| 281 |
-
new_entry_dict.update(metrics)
|
| 282 |
-
|
| 283 |
-
# Ensure only desired columns
|
| 284 |
-
filtered_entry = {k: v for k, v in new_entry_dict.items() if k in LEADERBOARD_COLUMNS}
|
| 285 |
-
# Fill missing
|
| 286 |
-
for col in LEADERBOARD_COLUMNS:
|
| 287 |
-
if col not in filtered_entry:
|
| 288 |
-
filtered_entry[col] = None
|
| 289 |
-
|
| 290 |
-
new_entry = pd.DataFrame([filtered_entry])
|
| 291 |
-
current_df = pd.concat([current_df, new_entry], ignore_index=True)
|
| 292 |
-
|
| 293 |
-
# Logic for saving history and updating file
|
| 294 |
-
# Create history directory if not exists
|
| 295 |
-
history_dir = "leaderboard_history"
|
| 296 |
-
os.makedirs(history_dir, exist_ok=True)
|
| 297 |
-
|
| 298 |
-
# Backup current leaderboard
|
| 299 |
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 300 |
-
backup_path = os.path.join(history_dir, f"leaderboard_{timestamp}.tsv")
|
| 301 |
-
if os.path.exists(LEADERBOARD_TSV):
|
| 302 |
-
shutil.copy2(LEADERBOARD_TSV, backup_path)
|
| 303 |
-
|
| 304 |
-
# Enforce columns before sort and save
|
| 305 |
-
current_df = current_df[LEADERBOARD_COLUMNS]
|
| 306 |
-
|
| 307 |
-
# Sort by F1-score (descending)
|
| 308 |
-
if "F1-score" in current_df.columns:
|
| 309 |
-
current_df = current_df.sort_values(by="F1-score", ascending=False).reset_index(drop=True)
|
| 310 |
-
|
| 311 |
-
current_df.to_csv(LEADERBOARD_TSV, index=False)
|
| 312 |
-
|
| 313 |
-
metrics_summary = f"Submission successful! New High Score!\n"
|
| 314 |
-
for metric_name, metric_value in metrics.items():
|
| 315 |
-
metrics_summary += f"{metric_name}: {metric_value}\n"
|
| 316 |
-
|
| 317 |
-
return metrics_summary, get_latest_leaderboard()
|
| 318 |
-
except Exception as e:
|
| 319 |
-
import traceback
|
| 320 |
-
error_msg = f"Error during submission: {str(e)}\n{traceback.format_exc()}"
|
| 321 |
-
return error_msg, get_latest_leaderboard()
|
| 322 |
|
| 323 |
def create_ui():
|
| 324 |
with gr.Blocks(theme=gr.themes.Soft(text_size=gr.themes.sizes.text_md), css=custom_css) as demo:
|
|
|
|
| 7 |
from utils import load_leaderboard, custom_css, load_ground_truth_references, calculate_comprehensive_metrics, parse_submission_csv
|
| 8 |
|
| 9 |
# Paths
|
| 10 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 11 |
+
LEADERBOARD_TSV = os.path.join(BASE_DIR, "leaderboard.tsv")
|
| 12 |
+
# Fixed typo 'groud_truth.csv' -> 'ground_truth.csv'
|
| 13 |
+
GROUND_TRUTH_CSV = os.path.join(BASE_DIR, "ground_truth.csv")
|
| 14 |
+
ALLOWED_TEAMS_FILE = os.path.join(BASE_DIR, "list.txt")
|
| 15 |
|
| 16 |
LEADERBOARD_COLUMNS = [
|
| 17 |
"Team", "F1-score", "Precision", "Recall", "Correct Rate",
|
|
|
|
| 216 |
|
| 217 |
# Logic for saving history and updating file
|
| 218 |
# Create history directory if not exists
|
| 219 |
+
history_dir = os.path.join(BASE_DIR, "leaderboard_history")
|
| 220 |
os.makedirs(history_dir, exist_ok=True)
|
| 221 |
|
| 222 |
+
# Create dated subfolder
|
| 223 |
+
date_str = datetime.now().strftime("%Y-%m-%d")
|
| 224 |
+
dated_dir = os.path.join(history_dir, date_str)
|
| 225 |
+
os.makedirs(dated_dir, exist_ok=True)
|
| 226 |
+
|
| 227 |
# Backup current leaderboard
|
| 228 |
+
timestamp = datetime.now().strftime("%H%M%S")
|
| 229 |
+
backup_path = os.path.join(dated_dir, f"leaderboard_{timestamp}.tsv")
|
| 230 |
if os.path.exists(LEADERBOARD_TSV):
|
| 231 |
shutil.copy2(LEADERBOARD_TSV, backup_path)
|
| 232 |
|
|
|
|
| 254 |
import traceback
|
| 255 |
error_msg = f"Error during submission: {str(e)}\n{traceback.format_exc()}"
|
| 256 |
return error_msg, get_latest_leaderboard()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
def create_ui():
|
| 259 |
with gr.Blocks(theme=gr.themes.Soft(text_size=gr.themes.sizes.text_md), css=custom_css) as demo:
|
Leaderboard/utils.py
CHANGED
|
@@ -226,6 +226,9 @@ def calculate_comprehensive_metrics(submission_file_obj, references):
|
|
| 226 |
metrics = {}
|
| 227 |
error_message = None
|
| 228 |
|
|
|
|
|
|
|
|
|
|
| 229 |
temp_dir = tempfile.mkdtemp()
|
| 230 |
try:
|
| 231 |
temp_aligned_dir = os.path.join(temp_dir, "aligned")
|
|
|
|
| 226 |
metrics = {}
|
| 227 |
error_message = None
|
| 228 |
|
| 229 |
+
if not references:
|
| 230 |
+
return None, "Error: Ground truth references could not be loaded. Please ensure 'ground_truth.csv' exists locally or HF_TOKEN is set to access 'IqraEval/QuranMB.v2.labels'."
|
| 231 |
+
|
| 232 |
temp_dir = tempfile.mkdtemp()
|
| 233 |
try:
|
| 234 |
temp_aligned_dir = os.path.join(temp_dir, "aligned")
|
app.py
CHANGED
|
@@ -7,11 +7,11 @@ import numpy as np
|
|
| 7 |
from utils import load_leaderboard, custom_css, load_ground_truth_references, calculate_comprehensive_metrics, parse_submission_csv
|
| 8 |
|
| 9 |
# Paths
|
| 10 |
-
|
| 11 |
-
LEADERBOARD_TSV =
|
| 12 |
-
#
|
| 13 |
-
GROUND_TRUTH_CSV = "
|
| 14 |
-
ALLOWED_TEAMS_FILE = "list.txt"
|
| 15 |
|
| 16 |
LEADERBOARD_COLUMNS = [
|
| 17 |
"Team", "F1-score", "Precision", "Recall", "Correct Rate",
|
|
@@ -216,12 +216,17 @@ def submit_and_update_leaderboard(team_name, submission_file):
|
|
| 216 |
|
| 217 |
# Logic for saving history and updating file
|
| 218 |
# Create history directory if not exists
|
| 219 |
-
history_dir = "leaderboard_history"
|
| 220 |
os.makedirs(history_dir, exist_ok=True)
|
| 221 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
# Backup current leaderboard
|
| 223 |
-
timestamp = datetime.now().strftime("%
|
| 224 |
-
backup_path = os.path.join(
|
| 225 |
if os.path.exists(LEADERBOARD_TSV):
|
| 226 |
shutil.copy2(LEADERBOARD_TSV, backup_path)
|
| 227 |
|
|
@@ -249,76 +254,6 @@ def submit_and_update_leaderboard(team_name, submission_file):
|
|
| 249 |
import traceback
|
| 250 |
error_msg = f"Error during submission: {str(e)}\n{traceback.format_exc()}"
|
| 251 |
return error_msg, get_latest_leaderboard()
|
| 252 |
-
|
| 253 |
-
if error_message:
|
| 254 |
-
return error_message, get_latest_leaderboard()
|
| 255 |
-
|
| 256 |
-
if metrics is None:
|
| 257 |
-
return "Error: Metrics could not be calculated.", get_latest_leaderboard()
|
| 258 |
-
|
| 259 |
-
# Map or Add 'Correct Rate' if present in metrics (e.g. from Accuracy)
|
| 260 |
-
if "Accuracy" in metrics:
|
| 261 |
-
metrics["Correct Rate"] = metrics["Accuracy"]
|
| 262 |
-
|
| 263 |
-
new_f1 = metrics.get("F1-score", 0)
|
| 264 |
-
|
| 265 |
-
# Check if team already exists
|
| 266 |
-
if team_name in current_df["Team"].values:
|
| 267 |
-
existing_row = current_df[current_df["Team"] == team_name]
|
| 268 |
-
# Use raw value not string representation if possible, assume load_leaderboard parses as float where possible
|
| 269 |
-
old_f1 = existing_row["F1-score"].values[0] if "F1-score" in existing_row.columns and pd.notna(existing_row["F1-score"].values[0]) else -1.0
|
| 270 |
-
|
| 271 |
-
if new_f1 <= old_f1:
|
| 272 |
-
return f"Submission successful but no improvement. New F1: {new_f1:.4f} <= Old F1: {old_f1:.4f}", get_latest_leaderboard()
|
| 273 |
-
|
| 274 |
-
# Improvement found, update existing entry
|
| 275 |
-
for metric_name, metric_value in metrics.items():
|
| 276 |
-
if metric_name in current_df.columns:
|
| 277 |
-
current_df.loc[current_df["Team"] == team_name, metric_name] = metric_value
|
| 278 |
-
else:
|
| 279 |
-
# Add new entry with all metrics
|
| 280 |
-
new_entry_dict = {"Team": team_name}
|
| 281 |
-
new_entry_dict.update(metrics)
|
| 282 |
-
|
| 283 |
-
# Ensure only desired columns
|
| 284 |
-
filtered_entry = {k: v for k, v in new_entry_dict.items() if k in LEADERBOARD_COLUMNS}
|
| 285 |
-
# Fill missing
|
| 286 |
-
for col in LEADERBOARD_COLUMNS:
|
| 287 |
-
if col not in filtered_entry:
|
| 288 |
-
filtered_entry[col] = None
|
| 289 |
-
|
| 290 |
-
new_entry = pd.DataFrame([filtered_entry])
|
| 291 |
-
current_df = pd.concat([current_df, new_entry], ignore_index=True)
|
| 292 |
-
|
| 293 |
-
# Logic for saving history and updating file
|
| 294 |
-
# Create history directory if not exists
|
| 295 |
-
history_dir = "leaderboard_history"
|
| 296 |
-
os.makedirs(history_dir, exist_ok=True)
|
| 297 |
-
|
| 298 |
-
# Backup current leaderboard
|
| 299 |
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 300 |
-
backup_path = os.path.join(history_dir, f"leaderboard_{timestamp}.tsv")
|
| 301 |
-
if os.path.exists(LEADERBOARD_TSV):
|
| 302 |
-
shutil.copy2(LEADERBOARD_TSV, backup_path)
|
| 303 |
-
|
| 304 |
-
# Enforce columns before sort and save
|
| 305 |
-
current_df = current_df[LEADERBOARD_COLUMNS]
|
| 306 |
-
|
| 307 |
-
# Sort by F1-score (descending)
|
| 308 |
-
if "F1-score" in current_df.columns:
|
| 309 |
-
current_df = current_df.sort_values(by="F1-score", ascending=False).reset_index(drop=True)
|
| 310 |
-
|
| 311 |
-
current_df.to_csv(LEADERBOARD_TSV, index=False)
|
| 312 |
-
|
| 313 |
-
metrics_summary = f"Submission successful! New High Score!\n"
|
| 314 |
-
for metric_name, metric_value in metrics.items():
|
| 315 |
-
metrics_summary += f"{metric_name}: {metric_value}\n"
|
| 316 |
-
|
| 317 |
-
return metrics_summary, get_latest_leaderboard()
|
| 318 |
-
except Exception as e:
|
| 319 |
-
import traceback
|
| 320 |
-
error_msg = f"Error during submission: {str(e)}\n{traceback.format_exc()}"
|
| 321 |
-
return error_msg, get_latest_leaderboard()
|
| 322 |
|
| 323 |
def create_ui():
|
| 324 |
with gr.Blocks(theme=gr.themes.Soft(text_size=gr.themes.sizes.text_md), css=custom_css) as demo:
|
|
|
|
| 7 |
from utils import load_leaderboard, custom_css, load_ground_truth_references, calculate_comprehensive_metrics, parse_submission_csv
|
| 8 |
|
| 9 |
# Paths
|
| 10 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 11 |
+
LEADERBOARD_TSV = os.path.join(BASE_DIR, "leaderboard.tsv")
|
| 12 |
+
# Fixed typo 'groud_truth.csv' -> 'ground_truth.csv'
|
| 13 |
+
GROUND_TRUTH_CSV = os.path.join(BASE_DIR, "ground_truth.csv")
|
| 14 |
+
ALLOWED_TEAMS_FILE = os.path.join(BASE_DIR, "list.txt")
|
| 15 |
|
| 16 |
LEADERBOARD_COLUMNS = [
|
| 17 |
"Team", "F1-score", "Precision", "Recall", "Correct Rate",
|
|
|
|
| 216 |
|
| 217 |
# Logic for saving history and updating file
|
| 218 |
# Create history directory if not exists
|
| 219 |
+
history_dir = os.path.join(BASE_DIR, "leaderboard_history")
|
| 220 |
os.makedirs(history_dir, exist_ok=True)
|
| 221 |
|
| 222 |
+
# Create dated subfolder
|
| 223 |
+
date_str = datetime.now().strftime("%Y-%m-%d")
|
| 224 |
+
dated_dir = os.path.join(history_dir, date_str)
|
| 225 |
+
os.makedirs(dated_dir, exist_ok=True)
|
| 226 |
+
|
| 227 |
# Backup current leaderboard
|
| 228 |
+
timestamp = datetime.now().strftime("%H%M%S")
|
| 229 |
+
backup_path = os.path.join(dated_dir, f"leaderboard_{timestamp}.tsv")
|
| 230 |
if os.path.exists(LEADERBOARD_TSV):
|
| 231 |
shutil.copy2(LEADERBOARD_TSV, backup_path)
|
| 232 |
|
|
|
|
| 254 |
import traceback
|
| 255 |
error_msg = f"Error during submission: {str(e)}\n{traceback.format_exc()}"
|
| 256 |
return error_msg, get_latest_leaderboard()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
def create_ui():
|
| 259 |
with gr.Blocks(theme=gr.themes.Soft(text_size=gr.themes.sizes.text_md), css=custom_css) as demo:
|
utils.py
CHANGED
|
@@ -226,6 +226,9 @@ def calculate_comprehensive_metrics(submission_file_obj, references):
|
|
| 226 |
metrics = {}
|
| 227 |
error_message = None
|
| 228 |
|
|
|
|
|
|
|
|
|
|
| 229 |
temp_dir = tempfile.mkdtemp()
|
| 230 |
try:
|
| 231 |
temp_aligned_dir = os.path.join(temp_dir, "aligned")
|
|
|
|
| 226 |
metrics = {}
|
| 227 |
error_message = None
|
| 228 |
|
| 229 |
+
if not references:
|
| 230 |
+
return None, "Error: Ground truth references could not be loaded. Please ensure 'ground_truth.csv' exists locally or HF_TOKEN is set to access 'IqraEval/QuranMB.v2.labels'."
|
| 231 |
+
|
| 232 |
temp_dir = tempfile.mkdtemp()
|
| 233 |
try:
|
| 234 |
temp_aligned_dir = os.path.join(temp_dir, "aligned")
|