Spaces:
Running
Running
Commit
·
d9a4d3b
1
Parent(s):
c4e2b27
add user_id
Browse files
app.py
CHANGED
|
@@ -46,6 +46,7 @@ STOCKFISH_LEVELS = {
|
|
| 46 |
# CSV columns for the leaderboard
|
| 47 |
LEADERBOARD_COLUMNS = [
|
| 48 |
"model_id",
|
|
|
|
| 49 |
"legal_rate",
|
| 50 |
"legal_rate_first_try",
|
| 51 |
# "elo",
|
|
@@ -171,6 +172,7 @@ def format_leaderboard_html(data: list) -> str:
|
|
| 171 |
<thead>
|
| 172 |
<tr>
|
| 173 |
<th>Rank</th>
|
|
|
|
| 174 |
<th>Model</th>
|
| 175 |
<th>Legal Rate</th>
|
| 176 |
<th>Legal Rate (1st try)</th>
|
|
@@ -199,9 +201,12 @@ def format_leaderboard_html(data: list) -> str:
|
|
| 199 |
legal_class = "legal-bad"
|
| 200 |
|
| 201 |
legal_rate_first_try = entry.get('legal_rate_first_try', 0)
|
|
|
|
|
|
|
| 202 |
html += f"""
|
| 203 |
<tr>
|
| 204 |
<td class="{rank_class}">{rank_display}</td>
|
|
|
|
| 205 |
<td><a href="{model_url}" target="_blank" class="model-link">{entry['model_id'].split('/')[-1]}</a></td>
|
| 206 |
<td class="{legal_class}">{legal_rate*100:.1f}%</td>
|
| 207 |
<td>{legal_rate_first_try*100:.1f}%</td>
|
|
@@ -325,20 +330,43 @@ def evaluate_legal_moves(
|
|
| 325 |
progress(0.2, desc=f"Testing {n_positions} positions...")
|
| 326 |
results = evaluator.evaluate_legal_moves(n_positions=n_positions, verbose=False)
|
| 327 |
|
| 328 |
-
#
|
|
|
|
|
|
|
|
|
|
| 329 |
leaderboard = load_leaderboard()
|
| 330 |
entry = next((e for e in leaderboard if e["model_id"] == model_id), None)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
if entry is None:
|
| 332 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
leaderboard.append(entry)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
|
| 335 |
-
entry.update({
|
| 336 |
-
"legal_rate": results.get("legal_rate_with_retry", 0),
|
| 337 |
-
"legal_rate_first_try": results.get("legal_rate_first_try", 0),
|
| 338 |
-
"last_updated": datetime.now().strftime("%Y-%m-%d %H:%M"),
|
| 339 |
-
})
|
| 340 |
-
|
| 341 |
-
save_leaderboard(leaderboard)
|
| 342 |
progress(1.0, desc="Done!")
|
| 343 |
|
| 344 |
return f"""
|
|
@@ -351,6 +379,9 @@ def evaluate_legal_moves(
|
|
| 351 |
| **Legal (with retries)** | {results['legal_first_try'] + results['legal_with_retry']} ({results['legal_rate_with_retry']*100:.1f}%) |
|
| 352 |
| **Always Illegal** | {results['illegal_all_retries']} ({results['illegal_rate']*100:.1f}%) |
|
| 353 |
|
|
|
|
|
|
|
|
|
|
| 354 |
### Interpretation
|
| 355 |
- **>90% legal rate**: Great! Model has learned chess rules well.
|
| 356 |
- **70-90% legal rate**: Decent, but room for improvement.
|
|
|
|
| 46 |
# CSV columns for the leaderboard
|
| 47 |
LEADERBOARD_COLUMNS = [
|
| 48 |
"model_id",
|
| 49 |
+
"user_id",
|
| 50 |
"legal_rate",
|
| 51 |
"legal_rate_first_try",
|
| 52 |
# "elo",
|
|
|
|
| 172 |
<thead>
|
| 173 |
<tr>
|
| 174 |
<th>Rank</th>
|
| 175 |
+
<th>User</th>
|
| 176 |
<th>Model</th>
|
| 177 |
<th>Legal Rate</th>
|
| 178 |
<th>Legal Rate (1st try)</th>
|
|
|
|
| 201 |
legal_class = "legal-bad"
|
| 202 |
|
| 203 |
legal_rate_first_try = entry.get('legal_rate_first_try', 0)
|
| 204 |
+
user_id = entry.get('user_id', 'unknown')
|
| 205 |
+
user_url = f"https://huggingface.co/{user_id}"
|
| 206 |
html += f"""
|
| 207 |
<tr>
|
| 208 |
<td class="{rank_class}">{rank_display}</td>
|
| 209 |
+
<td><a href="{user_url}" target="_blank" class="model-link">{user_id}</a></td>
|
| 210 |
<td><a href="{model_url}" target="_blank" class="model-link">{entry['model_id'].split('/')[-1]}</a></td>
|
| 211 |
<td class="{legal_class}">{legal_rate*100:.1f}%</td>
|
| 212 |
<td>{legal_rate_first_try*100:.1f}%</td>
|
|
|
|
| 330 |
progress(0.2, desc=f"Testing {n_positions} positions...")
|
| 331 |
results = evaluator.evaluate_legal_moves(n_positions=n_positions, verbose=False)
|
| 332 |
|
| 333 |
+
# Extract user_id from model_id (format: user_id/model_name)
|
| 334 |
+
user_id = model_id.split('/')[0] if '/' in model_id else 'unknown'
|
| 335 |
+
|
| 336 |
+
# Update leaderboard - only if improved
|
| 337 |
leaderboard = load_leaderboard()
|
| 338 |
entry = next((e for e in leaderboard if e["model_id"] == model_id), None)
|
| 339 |
+
|
| 340 |
+
new_legal_rate = results.get("legal_rate_with_retry", 0)
|
| 341 |
+
new_legal_rate_first_try = results.get("legal_rate_first_try", 0)
|
| 342 |
+
|
| 343 |
if entry is None:
|
| 344 |
+
# New model - add to leaderboard
|
| 345 |
+
entry = {
|
| 346 |
+
"model_id": model_id,
|
| 347 |
+
"user_id": user_id,
|
| 348 |
+
"legal_rate": new_legal_rate,
|
| 349 |
+
"legal_rate_first_try": new_legal_rate_first_try,
|
| 350 |
+
"last_updated": datetime.now().strftime("%Y-%m-%d %H:%M"),
|
| 351 |
+
}
|
| 352 |
leaderboard.append(entry)
|
| 353 |
+
save_leaderboard(leaderboard)
|
| 354 |
+
update_message = "New entry added to leaderboard!"
|
| 355 |
+
else:
|
| 356 |
+
# Existing model - only update if improved
|
| 357 |
+
old_legal_rate = entry.get("legal_rate", 0)
|
| 358 |
+
if new_legal_rate > old_legal_rate:
|
| 359 |
+
entry.update({
|
| 360 |
+
"user_id": user_id,
|
| 361 |
+
"legal_rate": new_legal_rate,
|
| 362 |
+
"legal_rate_first_try": new_legal_rate_first_try,
|
| 363 |
+
"last_updated": datetime.now().strftime("%Y-%m-%d %H:%M"),
|
| 364 |
+
})
|
| 365 |
+
save_leaderboard(leaderboard)
|
| 366 |
+
update_message = f"Improved! Previous: {old_legal_rate*100:.1f}% → New: {new_legal_rate*100:.1f}%"
|
| 367 |
+
else:
|
| 368 |
+
update_message = f"ℹNo improvement. Current best: {old_legal_rate*100:.1f}%, This run: {new_legal_rate*100:.1f}%"
|
| 369 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
progress(1.0, desc="Done!")
|
| 371 |
|
| 372 |
return f"""
|
|
|
|
| 379 |
| **Legal (with retries)** | {results['legal_first_try'] + results['legal_with_retry']} ({results['legal_rate_with_retry']*100:.1f}%) |
|
| 380 |
| **Always Illegal** | {results['illegal_all_retries']} ({results['illegal_rate']*100:.1f}%) |
|
| 381 |
|
| 382 |
+
### Leaderboard Update
|
| 383 |
+
{update_message}
|
| 384 |
+
|
| 385 |
### Interpretation
|
| 386 |
- **>90% legal rate**: Great! Model has learned chess rules well.
|
| 387 |
- **70-90% legal rate**: Decent, but room for improvement.
|