Spaces:
Sleeping
Sleeping
Update src/leaderboard.py
Browse files- src/leaderboard.py +100 -31
src/leaderboard.py
CHANGED
|
@@ -82,13 +82,27 @@ def load_leaderboard() -> pd.DataFrame:
|
|
| 82 |
else:
|
| 83 |
df[col] = ""
|
| 84 |
|
| 85 |
-
# Ensure proper data types for numeric columns
|
| 86 |
numeric_columns = [
|
| 87 |
col for col in df.columns
|
| 88 |
if any(x in col for x in ["quality", "bleu", "chrf", "ci_", "samples", "pairs"])
|
| 89 |
]
|
|
|
|
| 90 |
for col in numeric_columns:
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
print(f"✅ Loaded leaderboard with {len(df)} entries")
|
| 94 |
return df
|
|
@@ -270,31 +284,75 @@ def get_track_leaderboard(
|
|
| 270 |
) -> pd.DataFrame:
|
| 271 |
"""Get leaderboard for a specific track with filtering."""
|
| 272 |
|
|
|
|
|
|
|
| 273 |
if df.empty:
|
|
|
|
| 274 |
return df
|
| 275 |
|
| 276 |
track_quality_col = f"{track}_{metric}"
|
| 277 |
|
| 278 |
# Ensure columns exist
|
| 279 |
if track_quality_col not in df.columns:
|
| 280 |
-
print(f"Warning: Missing column for track {track}")
|
|
|
|
| 281 |
return pd.DataFrame()
|
| 282 |
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
|
| 299 |
|
| 300 |
def prepare_leaderboard_display(df: pd.DataFrame, track: str) -> pd.DataFrame:
|
|
@@ -320,20 +378,31 @@ def prepare_leaderboard_display(df: pd.DataFrame, track: str) -> pd.DataFrame:
|
|
| 320 |
available_columns = [col for col in base_columns + track_columns if col in df.columns]
|
| 321 |
display_df = df[available_columns].copy()
|
| 322 |
|
| 323 |
-
# Format numeric columns
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
display_df[col] = display_df[col].apply(
|
| 335 |
-
lambda x: fmt.format(float(x)) if pd.notnull(x) else "0.0000"
|
| 336 |
-
)
|
| 337 |
|
| 338 |
# Format confidence intervals
|
| 339 |
if f"{track}_ci_lower" in display_df.columns and f"{track}_ci_upper" in display_df.columns:
|
|
|
|
| 82 |
else:
|
| 83 |
df[col] = ""
|
| 84 |
|
| 85 |
+
# Ensure proper data types for numeric columns with robust conversion
|
| 86 |
numeric_columns = [
|
| 87 |
col for col in df.columns
|
| 88 |
if any(x in col for x in ["quality", "bleu", "chrf", "ci_", "samples", "pairs"])
|
| 89 |
]
|
| 90 |
+
|
| 91 |
for col in numeric_columns:
|
| 92 |
+
try:
|
| 93 |
+
# Convert to numeric, coercing errors to NaN, then fill NaN with 0
|
| 94 |
+
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0.0)
|
| 95 |
+
# Ensure it's float type for consistency
|
| 96 |
+
df[col] = df[col].astype(float)
|
| 97 |
+
except Exception as e:
|
| 98 |
+
print(f"Warning: Could not convert column {col} to numeric: {e}")
|
| 99 |
+
df[col] = 0.0
|
| 100 |
+
|
| 101 |
+
# Ensure string columns are properly typed
|
| 102 |
+
string_columns = ["model_name", "author", "model_category", "description", "submission_date", "evaluation_date"]
|
| 103 |
+
for col in string_columns:
|
| 104 |
+
if col in df.columns:
|
| 105 |
+
df[col] = df[col].fillna("").astype(str)
|
| 106 |
|
| 107 |
print(f"✅ Loaded leaderboard with {len(df)} entries")
|
| 108 |
return df
|
|
|
|
| 284 |
) -> pd.DataFrame:
|
| 285 |
"""Get leaderboard for a specific track with filtering."""
|
| 286 |
|
| 287 |
+
print(f"Getting track leaderboard for {track}, input df has {len(df)} rows")
|
| 288 |
+
|
| 289 |
if df.empty:
|
| 290 |
+
print("Input DataFrame is empty")
|
| 291 |
return df
|
| 292 |
|
| 293 |
track_quality_col = f"{track}_{metric}"
|
| 294 |
|
| 295 |
# Ensure columns exist
|
| 296 |
if track_quality_col not in df.columns:
|
| 297 |
+
print(f"Warning: Missing column {track_quality_col} for track {track}")
|
| 298 |
+
print(f"Available columns: {list(df.columns)}")
|
| 299 |
return pd.DataFrame()
|
| 300 |
|
| 301 |
+
try:
|
| 302 |
+
# Make a copy to avoid modifying original
|
| 303 |
+
df_filtered = df.copy()
|
| 304 |
+
print(f"Created copy with {len(df_filtered)} rows")
|
| 305 |
+
|
| 306 |
+
# Filter by category
|
| 307 |
+
if category_filter != "all":
|
| 308 |
+
original_count = len(df_filtered)
|
| 309 |
+
df_filtered = df_filtered[df_filtered["model_category"] == category_filter]
|
| 310 |
+
print(f"After category filter '{category_filter}': {len(df_filtered)} rows (was {original_count})")
|
| 311 |
+
|
| 312 |
+
# Ensure numeric columns are properly typed
|
| 313 |
+
numeric_columns = [
|
| 314 |
+
f"{track}_quality", f"{track}_bleu", f"{track}_chrf",
|
| 315 |
+
f"{track}_ci_lower", f"{track}_ci_upper",
|
| 316 |
+
f"{track}_samples", f"{track}_pairs"
|
| 317 |
+
]
|
| 318 |
+
|
| 319 |
+
print(f"Converting numeric columns: {[col for col in numeric_columns if col in df_filtered.columns]}")
|
| 320 |
+
|
| 321 |
+
for col in numeric_columns:
|
| 322 |
+
if col in df_filtered.columns:
|
| 323 |
+
try:
|
| 324 |
+
# Check original data type
|
| 325 |
+
print(f"Column {col} dtype: {df_filtered[col].dtype}, sample values: {df_filtered[col].head(3).tolist()}")
|
| 326 |
+
|
| 327 |
+
# Convert to numeric
|
| 328 |
+
df_filtered[col] = pd.to_numeric(df_filtered[col], errors='coerce').fillna(0.0)
|
| 329 |
+
print(f"Column {col} converted successfully")
|
| 330 |
+
except Exception as e:
|
| 331 |
+
print(f"Error converting column {col}: {e}")
|
| 332 |
+
df_filtered[col] = 0.0
|
| 333 |
+
|
| 334 |
+
# Filter to models that have this track
|
| 335 |
+
original_count = len(df_filtered)
|
| 336 |
+
quality_mask = df_filtered[track_quality_col] > 0
|
| 337 |
+
df_filtered = df_filtered[quality_mask]
|
| 338 |
+
print(f"After quality filter (>{track_quality_col} > 0): {len(df_filtered)} rows (was {original_count})")
|
| 339 |
+
|
| 340 |
+
if df_filtered.empty:
|
| 341 |
+
print("No models found with quality > 0 for this track")
|
| 342 |
+
return df_filtered
|
| 343 |
+
|
| 344 |
+
# Sort by track-specific metric
|
| 345 |
+
print(f"Sorting by {track_quality_col}")
|
| 346 |
+
df_filtered = df_filtered.sort_values(track_quality_col, ascending=False).reset_index(drop=True)
|
| 347 |
+
print(f"Sorted successfully, final result has {len(df_filtered)} rows")
|
| 348 |
+
|
| 349 |
+
return df_filtered
|
| 350 |
+
|
| 351 |
+
except Exception as e:
|
| 352 |
+
print(f"Error in get_track_leaderboard: {e}")
|
| 353 |
+
import traceback
|
| 354 |
+
traceback.print_exc()
|
| 355 |
+
return pd.DataFrame()
|
| 356 |
|
| 357 |
|
| 358 |
def prepare_leaderboard_display(df: pd.DataFrame, track: str) -> pd.DataFrame:
|
|
|
|
| 378 |
available_columns = [col for col in base_columns + track_columns if col in df.columns]
|
| 379 |
display_df = df[available_columns].copy()
|
| 380 |
|
| 381 |
+
# Format numeric columns safely
|
| 382 |
+
def safe_format(value, precision=4):
|
| 383 |
+
"""Safely format numeric values."""
|
| 384 |
+
try:
|
| 385 |
+
if pd.isna(value) or value is None:
|
| 386 |
+
return "0.0000" if precision == 4 else "0.00"
|
| 387 |
+
return f"{float(value):.{precision}f}"
|
| 388 |
+
except (ValueError, TypeError):
|
| 389 |
+
return "0.0000" if precision == 4 else "0.00"
|
| 390 |
+
|
| 391 |
+
# Apply formatting to numeric columns
|
| 392 |
+
if f"{track}_quality" in display_df.columns:
|
| 393 |
+
display_df[f"{track}_quality"] = display_df[f"{track}_quality"].apply(lambda x: safe_format(x, 4))
|
| 394 |
+
|
| 395 |
+
if f"{track}_bleu" in display_df.columns:
|
| 396 |
+
display_df[f"{track}_bleu"] = display_df[f"{track}_bleu"].apply(lambda x: safe_format(x, 2))
|
| 397 |
+
|
| 398 |
+
if f"{track}_chrf" in display_df.columns:
|
| 399 |
+
display_df[f"{track}_chrf"] = display_df[f"{track}_chrf"].apply(lambda x: safe_format(x, 4))
|
| 400 |
+
|
| 401 |
+
if f"{track}_ci_lower" in display_df.columns:
|
| 402 |
+
display_df[f"{track}_ci_lower"] = display_df[f"{track}_ci_lower"].apply(lambda x: safe_format(x, 4))
|
| 403 |
|
| 404 |
+
if f"{track}_ci_upper" in display_df.columns:
|
| 405 |
+
display_df[f"{track}_ci_upper"] = display_df[f"{track}_ci_upper"].apply(lambda x: safe_format(x, 4))
|
|
|
|
|
|
|
|
|
|
| 406 |
|
| 407 |
# Format confidence intervals
|
| 408 |
if f"{track}_ci_lower" in display_df.columns and f"{track}_ci_upper" in display_df.columns:
|