Spaces:
Running
Running
Commit
·
eda3ee4
1
Parent(s):
b2379f3
show best model per user
Browse files
app.py
CHANGED
|
@@ -114,12 +114,32 @@ def save_leaderboard(data: list):
|
|
| 114 |
|
| 115 |
|
| 116 |
def get_available_models() -> list:
|
| 117 |
-
"""Fetch available models from the organization."""
|
| 118 |
try:
|
| 119 |
from huggingface_hub import list_models
|
| 120 |
|
| 121 |
-
models
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
except Exception as e:
|
| 124 |
print(f"Error fetching models: {e}")
|
| 125 |
return ["No models available"]
|
|
@@ -130,8 +150,16 @@ def format_leaderboard_html(data: list) -> str:
|
|
| 130 |
if not data:
|
| 131 |
return "<p>No models evaluated yet. Be the first to submit!</p>"
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
# Sort by legal_rate
|
| 134 |
-
sorted_data = sorted(
|
| 135 |
|
| 136 |
html = """
|
| 137 |
<style>
|
|
@@ -381,15 +409,17 @@ Please ensure your model was submitted using the official submission script (`su
|
|
| 381 |
which adds the required metadata to the README.md file.
|
| 382 |
"""
|
| 383 |
|
| 384 |
-
# Update leaderboard - only
|
| 385 |
leaderboard = load_leaderboard()
|
| 386 |
-
|
|
|
|
|
|
|
| 387 |
|
| 388 |
new_legal_rate = results.get("legal_rate_with_retry", 0)
|
| 389 |
new_legal_rate_first_try = results.get("legal_rate_first_try", 0)
|
| 390 |
|
| 391 |
-
if
|
| 392 |
-
# New
|
| 393 |
entry = {
|
| 394 |
"model_id": model_id,
|
| 395 |
"user_id": user_id,
|
|
@@ -401,19 +431,23 @@ which adds the required metadata to the README.md file.
|
|
| 401 |
save_leaderboard(leaderboard)
|
| 402 |
update_message = "New entry added to leaderboard!"
|
| 403 |
else:
|
| 404 |
-
# Existing
|
| 405 |
-
old_legal_rate =
|
|
|
|
| 406 |
if new_legal_rate > old_legal_rate:
|
| 407 |
-
|
| 408 |
-
"
|
| 409 |
"legal_rate": new_legal_rate,
|
| 410 |
"legal_rate_first_try": new_legal_rate_first_try,
|
| 411 |
"last_updated": datetime.now().strftime("%Y-%m-%d %H:%M"),
|
| 412 |
})
|
| 413 |
save_leaderboard(leaderboard)
|
| 414 |
-
|
|
|
|
|
|
|
|
|
|
| 415 |
else:
|
| 416 |
-
update_message = f"
|
| 417 |
|
| 418 |
progress(1.0, desc="Done!")
|
| 419 |
|
|
|
|
| 114 |
|
| 115 |
|
| 116 |
def get_available_models() -> list:
|
| 117 |
+
"""Fetch available models from the organization, newest first, one per user."""
|
| 118 |
try:
|
| 119 |
from huggingface_hub import list_models
|
| 120 |
|
| 121 |
+
# Get all chess models sorted by newest first
|
| 122 |
+
models = list(list_models(author=ORGANIZATION, sort="lastModified", direction=-1))
|
| 123 |
+
chess_models = [m for m in models if "chess" in m.id.lower()]
|
| 124 |
+
|
| 125 |
+
# Keep only the latest model per user (based on model name pattern: chess-<username>-*)
|
| 126 |
+
seen_users = set()
|
| 127 |
+
filtered_models = []
|
| 128 |
+
for m in chess_models:
|
| 129 |
+
# Extract username from model id (format: LLM-course/chess-<username>-<modelname>)
|
| 130 |
+
model_name = m.id.split("/")[-1] # e.g., "chess-johndoe-mymodel"
|
| 131 |
+
parts = model_name.split("-")
|
| 132 |
+
if len(parts) >= 2:
|
| 133 |
+
# Username is after "chess-"
|
| 134 |
+
username = parts[1] if parts[0] == "chess" else None
|
| 135 |
+
if username and username not in seen_users:
|
| 136 |
+
seen_users.add(username)
|
| 137 |
+
filtered_models.append(m.id)
|
| 138 |
+
else:
|
| 139 |
+
# If pattern doesn't match, include the model anyway
|
| 140 |
+
filtered_models.append(m.id)
|
| 141 |
+
|
| 142 |
+
return filtered_models if filtered_models else ["No models available"]
|
| 143 |
except Exception as e:
|
| 144 |
print(f"Error fetching models: {e}")
|
| 145 |
return ["No models available"]
|
|
|
|
| 150 |
if not data:
|
| 151 |
return "<p>No models evaluated yet. Be the first to submit!</p>"
|
| 152 |
|
| 153 |
+
# Keep only the best entry per user
|
| 154 |
+
best_per_user = {}
|
| 155 |
+
for entry in data:
|
| 156 |
+
user_id = entry.get("user_id", "unknown")
|
| 157 |
+
legal_rate = entry.get("legal_rate", 0)
|
| 158 |
+
if user_id not in best_per_user or legal_rate > best_per_user[user_id].get("legal_rate", 0):
|
| 159 |
+
best_per_user[user_id] = entry
|
| 160 |
+
|
| 161 |
# Sort by legal_rate
|
| 162 |
+
sorted_data = sorted(best_per_user.values(), key=lambda x: x.get("legal_rate", 0), reverse=True)
|
| 163 |
|
| 164 |
html = """
|
| 165 |
<style>
|
|
|
|
| 409 |
which adds the required metadata to the README.md file.
|
| 410 |
"""
|
| 411 |
|
| 412 |
+
# Update leaderboard - only one entry per user, keep the best
|
| 413 |
leaderboard = load_leaderboard()
|
| 414 |
+
|
| 415 |
+
# Find existing entry for this user (not model - one entry per user)
|
| 416 |
+
user_entry = next((e for e in leaderboard if e.get("user_id") == user_id), None)
|
| 417 |
|
| 418 |
new_legal_rate = results.get("legal_rate_with_retry", 0)
|
| 419 |
new_legal_rate_first_try = results.get("legal_rate_first_try", 0)
|
| 420 |
|
| 421 |
+
if user_entry is None:
|
| 422 |
+
# New user - add to leaderboard
|
| 423 |
entry = {
|
| 424 |
"model_id": model_id,
|
| 425 |
"user_id": user_id,
|
|
|
|
| 431 |
save_leaderboard(leaderboard)
|
| 432 |
update_message = "New entry added to leaderboard!"
|
| 433 |
else:
|
| 434 |
+
# Existing user - only update if this submission is better
|
| 435 |
+
old_legal_rate = user_entry.get("legal_rate", 0)
|
| 436 |
+
old_model = user_entry.get("model_id", "unknown")
|
| 437 |
if new_legal_rate > old_legal_rate:
|
| 438 |
+
user_entry.update({
|
| 439 |
+
"model_id": model_id, # Update to new model if better
|
| 440 |
"legal_rate": new_legal_rate,
|
| 441 |
"legal_rate_first_try": new_legal_rate_first_try,
|
| 442 |
"last_updated": datetime.now().strftime("%Y-%m-%d %H:%M"),
|
| 443 |
})
|
| 444 |
save_leaderboard(leaderboard)
|
| 445 |
+
if old_model != model_id:
|
| 446 |
+
update_message = f"🎉 Improved! New best model for {user_id}: {old_legal_rate*100:.1f}% → {new_legal_rate*100:.1f}%"
|
| 447 |
+
else:
|
| 448 |
+
update_message = f"🎉 Improved! Previous: {old_legal_rate*100:.1f}% → New: {new_legal_rate*100:.1f}%"
|
| 449 |
else:
|
| 450 |
+
update_message = f"ℹ️ No improvement. Your best: {old_legal_rate*100:.1f}% (model: {old_model.split('/')[-1]}), This run: {new_legal_rate*100:.1f}%"
|
| 451 |
|
| 452 |
progress(1.0, desc="Done!")
|
| 453 |
|