Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -98,6 +98,14 @@ def initialize_data():
|
|
| 98 |
print("π Loading leaderboard...")
|
| 99 |
current_leaderboard = load_leaderboard()
|
| 100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
print(f"β
Initialization complete!")
|
| 102 |
print(f" - Test set: {len(public_test_set):,} samples")
|
| 103 |
print(f" - Current models: {len(current_leaderboard)}")
|
|
@@ -106,6 +114,7 @@ def initialize_data():
|
|
| 106 |
|
| 107 |
except Exception as e:
|
| 108 |
print(f"β Initialization failed: {e}")
|
|
|
|
| 109 |
traceback.print_exc()
|
| 110 |
return False
|
| 111 |
|
|
@@ -263,42 +272,102 @@ def evaluate_submission(
|
|
| 263 |
def refresh_track_leaderboard(track: str, search_query: str = "", category_filter: str = "all") -> Tuple[pd.DataFrame, object, object, str]:
|
| 264 |
"""Refresh leaderboard for a specific track with filters."""
|
| 265 |
try:
|
|
|
|
|
|
|
| 266 |
global current_leaderboard
|
| 267 |
if current_leaderboard is None:
|
|
|
|
| 268 |
current_leaderboard = load_leaderboard()
|
| 269 |
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
# Apply search filter
|
| 273 |
if search_query and not track_leaderboard.empty:
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
### π {track_config['name']} Statistics
|
| 288 |
|
| 289 |
- **Total Models**: {len(track_leaderboard)}
|
| 290 |
-
- **Best Model**: {
|
| 291 |
-
- **Best Score**: {
|
| 292 |
|
| 293 |
### π¬ Track Information:
|
| 294 |
-
{track_config
|
| 295 |
-
|
| 296 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
return display_df, ranking_plot, comparison_plot, stats_text
|
| 298 |
|
| 299 |
except Exception as e:
|
| 300 |
error_msg = f"Error loading {track} leaderboard: {str(e)}"
|
| 301 |
-
print(error_msg)
|
|
|
|
|
|
|
| 302 |
return pd.DataFrame(), None, None, error_msg
|
| 303 |
|
| 304 |
def get_language_pair_comparison(track: str) -> Tuple[pd.DataFrame, object]:
|
|
@@ -726,10 +795,33 @@ with gr.Blocks(
|
|
| 726 |
outputs=[pairs_table, pairs_comparison_plot]
|
| 727 |
)
|
| 728 |
|
| 729 |
-
# Load initial data
|
| 730 |
def load_initial_data():
|
| 731 |
-
|
| 732 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 733 |
|
| 734 |
demo.load(
|
| 735 |
fn=load_initial_data,
|
|
|
|
| 98 |
print("π Loading leaderboard...")
|
| 99 |
current_leaderboard = load_leaderboard()
|
| 100 |
|
| 101 |
+
# Debug leaderboard content
|
| 102 |
+
print(f"Leaderboard loaded with {len(current_leaderboard)} entries")
|
| 103 |
+
if not current_leaderboard.empty:
|
| 104 |
+
print(f"Leaderboard columns: {list(current_leaderboard.columns)}")
|
| 105 |
+
print(f"Sample row types: {current_leaderboard.dtypes.to_dict()}")
|
| 106 |
+
else:
|
| 107 |
+
print("Leaderboard is empty - will show empty interface")
|
| 108 |
+
|
| 109 |
print(f"β
Initialization complete!")
|
| 110 |
print(f" - Test set: {len(public_test_set):,} samples")
|
| 111 |
print(f" - Current models: {len(current_leaderboard)}")
|
|
|
|
| 114 |
|
| 115 |
except Exception as e:
|
| 116 |
print(f"β Initialization failed: {e}")
|
| 117 |
+
import traceback
|
| 118 |
traceback.print_exc()
|
| 119 |
return False
|
| 120 |
|
|
|
|
| 272 |
def refresh_track_leaderboard(track: str, search_query: str = "", category_filter: str = "all") -> Tuple[pd.DataFrame, object, object, str]:
|
| 273 |
"""Refresh leaderboard for a specific track with filters."""
|
| 274 |
try:
|
| 275 |
+
print(f"Refreshing {track} leaderboard...")
|
| 276 |
+
|
| 277 |
global current_leaderboard
|
| 278 |
if current_leaderboard is None:
|
| 279 |
+
print("Loading leaderboard...")
|
| 280 |
current_leaderboard = load_leaderboard()
|
| 281 |
|
| 282 |
+
print(f"Leaderboard loaded with {len(current_leaderboard)} entries")
|
| 283 |
+
|
| 284 |
+
# Get track leaderboard with robust error handling
|
| 285 |
+
try:
|
| 286 |
+
print(f"Getting track leaderboard for {track}...")
|
| 287 |
+
track_leaderboard = get_track_leaderboard(current_leaderboard, track, category_filter=category_filter)
|
| 288 |
+
print(f"Track leaderboard has {len(track_leaderboard)} entries")
|
| 289 |
+
except Exception as e:
|
| 290 |
+
print(f"Error getting track leaderboard: {e}")
|
| 291 |
+
track_leaderboard = pd.DataFrame()
|
| 292 |
|
| 293 |
# Apply search filter
|
| 294 |
if search_query and not track_leaderboard.empty:
|
| 295 |
+
try:
|
| 296 |
+
print(f"Applying search filter: {search_query}")
|
| 297 |
+
query_lower = search_query.lower()
|
| 298 |
+
mask = (
|
| 299 |
+
track_leaderboard['model_name'].str.lower().str.contains(query_lower, na=False) |
|
| 300 |
+
track_leaderboard['author'].str.lower().str.contains(query_lower, na=False)
|
| 301 |
+
)
|
| 302 |
+
track_leaderboard = track_leaderboard[mask]
|
| 303 |
+
print(f"After search filter: {len(track_leaderboard)} entries")
|
| 304 |
+
except Exception as e:
|
| 305 |
+
print(f"Error applying search filter: {e}")
|
| 306 |
+
|
| 307 |
+
# Prepare display with error handling
|
| 308 |
+
try:
|
| 309 |
+
print("Preparing display...")
|
| 310 |
+
display_df = prepare_leaderboard_display(track_leaderboard, track)
|
| 311 |
+
print(f"Display prepared with {len(display_df)} rows")
|
| 312 |
+
except Exception as e:
|
| 313 |
+
print(f"Error preparing display: {e}")
|
| 314 |
+
display_df = pd.DataFrame()
|
| 315 |
+
|
| 316 |
+
# Create plots with error handling
|
| 317 |
+
try:
|
| 318 |
+
print("Creating ranking plot...")
|
| 319 |
+
ranking_plot = create_leaderboard_plot(track_leaderboard, track)
|
| 320 |
+
except Exception as e:
|
| 321 |
+
print(f"Error creating ranking plot: {e}")
|
| 322 |
+
ranking_plot = None
|
| 323 |
+
|
| 324 |
+
try:
|
| 325 |
+
print("Creating comparison plot...")
|
| 326 |
+
comparison_plot = create_performance_comparison_plot(track_leaderboard, track)
|
| 327 |
+
except Exception as e:
|
| 328 |
+
print(f"Error creating comparison plot: {e}")
|
| 329 |
+
comparison_plot = None
|
| 330 |
+
|
| 331 |
+
# Generate stats text with safe formatting
|
| 332 |
+
try:
|
| 333 |
+
print("Generating stats...")
|
| 334 |
+
track_config = EVALUATION_TRACKS[track]
|
| 335 |
+
best_model = "None"
|
| 336 |
+
best_score = 0.0
|
| 337 |
+
|
| 338 |
+
if not track_leaderboard.empty:
|
| 339 |
+
best_model = str(track_leaderboard.iloc[0]['model_name'])
|
| 340 |
+
quality_col = f'{track}_quality'
|
| 341 |
+
if quality_col in track_leaderboard.columns:
|
| 342 |
+
try:
|
| 343 |
+
score_val = track_leaderboard.iloc[0][quality_col]
|
| 344 |
+
best_score = float(score_val) if pd.notnull(score_val) else 0.0
|
| 345 |
+
except (ValueError, TypeError):
|
| 346 |
+
best_score = 0.0
|
| 347 |
+
|
| 348 |
+
stats_text = f"""
|
| 349 |
### π {track_config['name']} Statistics
|
| 350 |
|
| 351 |
- **Total Models**: {len(track_leaderboard)}
|
| 352 |
+
- **Best Model**: {best_model}
|
| 353 |
+
- **Best Score**: {best_score:.4f}
|
| 354 |
|
| 355 |
### π¬ Track Information:
|
| 356 |
+
{track_config.get('description', 'No description available')}
|
| 357 |
+
"""
|
| 358 |
+
print("Stats generated successfully")
|
| 359 |
+
except Exception as e:
|
| 360 |
+
print(f"Error generating stats: {e}")
|
| 361 |
+
stats_text = f"Error loading {track} statistics: {str(e)}"
|
| 362 |
+
|
| 363 |
+
print("Track refresh completed successfully")
|
| 364 |
return display_df, ranking_plot, comparison_plot, stats_text
|
| 365 |
|
| 366 |
except Exception as e:
|
| 367 |
error_msg = f"Error loading {track} leaderboard: {str(e)}"
|
| 368 |
+
print(f"MAIN ERROR: {error_msg}")
|
| 369 |
+
import traceback
|
| 370 |
+
traceback.print_exc()
|
| 371 |
return pd.DataFrame(), None, None, error_msg
|
| 372 |
|
| 373 |
def get_language_pair_comparison(track: str) -> Tuple[pd.DataFrame, object]:
|
|
|
|
| 795 |
outputs=[pairs_table, pairs_comparison_plot]
|
| 796 |
)
|
| 797 |
|
| 798 |
+
# Load initial data and update dropdowns
|
| 799 |
def load_initial_data():
|
| 800 |
+
try:
|
| 801 |
+
print("Loading initial data...")
|
| 802 |
+
global current_leaderboard
|
| 803 |
+
|
| 804 |
+
# Make sure we have a leaderboard
|
| 805 |
+
if current_leaderboard is None:
|
| 806 |
+
current_leaderboard = load_leaderboard()
|
| 807 |
+
|
| 808 |
+
print(f"Current leaderboard has {len(current_leaderboard)} entries")
|
| 809 |
+
|
| 810 |
+
# Try to load Google track data
|
| 811 |
+
try:
|
| 812 |
+
google_data = refresh_track_leaderboard("google_comparable", "", "all")
|
| 813 |
+
print("Successfully loaded Google track data")
|
| 814 |
+
return google_data
|
| 815 |
+
except Exception as e:
|
| 816 |
+
print(f"Error loading Google track: {e}")
|
| 817 |
+
# Return empty data if there's an error
|
| 818 |
+
empty_df = pd.DataFrame()
|
| 819 |
+
return (empty_df, None, None, "No data available")
|
| 820 |
+
|
| 821 |
+
except Exception as e:
|
| 822 |
+
print(f"Error in load_initial_data: {e}")
|
| 823 |
+
empty_df = pd.DataFrame()
|
| 824 |
+
return (empty_df, None, None, "Error loading data")
|
| 825 |
|
| 826 |
demo.load(
|
| 827 |
fn=load_initial_data,
|