nathanael-fijalkow commited on
Commit
d9a4d3b
·
1 Parent(s): c4e2b27

add user_id

Browse files
Files changed (1) hide show
  1. app.py +40 -9
app.py CHANGED
@@ -46,6 +46,7 @@ STOCKFISH_LEVELS = {
46
  # CSV columns for the leaderboard
47
  LEADERBOARD_COLUMNS = [
48
  "model_id",
 
49
  "legal_rate",
50
  "legal_rate_first_try",
51
  # "elo",
@@ -171,6 +172,7 @@ def format_leaderboard_html(data: list) -> str:
171
  <thead>
172
  <tr>
173
  <th>Rank</th>
 
174
  <th>Model</th>
175
  <th>Legal Rate</th>
176
  <th>Legal Rate (1st try)</th>
@@ -199,9 +201,12 @@ def format_leaderboard_html(data: list) -> str:
199
  legal_class = "legal-bad"
200
 
201
  legal_rate_first_try = entry.get('legal_rate_first_try', 0)
 
 
202
  html += f"""
203
  <tr>
204
  <td class="{rank_class}">{rank_display}</td>
 
205
  <td><a href="{model_url}" target="_blank" class="model-link">{entry['model_id'].split('/')[-1]}</a></td>
206
  <td class="{legal_class}">{legal_rate*100:.1f}%</td>
207
  <td>{legal_rate_first_try*100:.1f}%</td>
@@ -325,20 +330,43 @@ def evaluate_legal_moves(
325
  progress(0.2, desc=f"Testing {n_positions} positions...")
326
  results = evaluator.evaluate_legal_moves(n_positions=n_positions, verbose=False)
327
 
328
- # Update leaderboard
 
 
 
329
  leaderboard = load_leaderboard()
330
  entry = next((e for e in leaderboard if e["model_id"] == model_id), None)
 
 
 
 
331
  if entry is None:
332
- entry = {"model_id": model_id}
 
 
 
 
 
 
 
333
  leaderboard.append(entry)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
 
335
- entry.update({
336
- "legal_rate": results.get("legal_rate_with_retry", 0),
337
- "legal_rate_first_try": results.get("legal_rate_first_try", 0),
338
- "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M"),
339
- })
340
-
341
- save_leaderboard(leaderboard)
342
  progress(1.0, desc="Done!")
343
 
344
  return f"""
@@ -351,6 +379,9 @@ def evaluate_legal_moves(
351
  | **Legal (with retries)** | {results['legal_first_try'] + results['legal_with_retry']} ({results['legal_rate_with_retry']*100:.1f}%) |
352
  | **Always Illegal** | {results['illegal_all_retries']} ({results['illegal_rate']*100:.1f}%) |
353
 
 
 
 
354
  ### Interpretation
355
  - **>90% legal rate**: Great! Model has learned chess rules well.
356
  - **70-90% legal rate**: Decent, but room for improvement.
 
46
  # CSV columns for the leaderboard
47
  LEADERBOARD_COLUMNS = [
48
  "model_id",
49
+ "user_id",
50
  "legal_rate",
51
  "legal_rate_first_try",
52
  # "elo",
 
172
  <thead>
173
  <tr>
174
  <th>Rank</th>
175
+ <th>User</th>
176
  <th>Model</th>
177
  <th>Legal Rate</th>
178
  <th>Legal Rate (1st try)</th>
 
201
  legal_class = "legal-bad"
202
 
203
  legal_rate_first_try = entry.get('legal_rate_first_try', 0)
204
+ user_id = entry.get('user_id', 'unknown')
205
+ user_url = f"https://huggingface.co/{user_id}"
206
  html += f"""
207
  <tr>
208
  <td class="{rank_class}">{rank_display}</td>
209
+ <td><a href="{user_url}" target="_blank" class="model-link">{user_id}</a></td>
210
  <td><a href="{model_url}" target="_blank" class="model-link">{entry['model_id'].split('/')[-1]}</a></td>
211
  <td class="{legal_class}">{legal_rate*100:.1f}%</td>
212
  <td>{legal_rate_first_try*100:.1f}%</td>
 
330
  progress(0.2, desc=f"Testing {n_positions} positions...")
331
  results = evaluator.evaluate_legal_moves(n_positions=n_positions, verbose=False)
332
 
333
+ # Extract user_id from model_id (format: user_id/model_name)
334
+ user_id = model_id.split('/')[0] if '/' in model_id else 'unknown'
335
+
336
+ # Update leaderboard - only if improved
337
  leaderboard = load_leaderboard()
338
  entry = next((e for e in leaderboard if e["model_id"] == model_id), None)
339
+
340
+ new_legal_rate = results.get("legal_rate_with_retry", 0)
341
+ new_legal_rate_first_try = results.get("legal_rate_first_try", 0)
342
+
343
  if entry is None:
344
+ # New model - add to leaderboard
345
+ entry = {
346
+ "model_id": model_id,
347
+ "user_id": user_id,
348
+ "legal_rate": new_legal_rate,
349
+ "legal_rate_first_try": new_legal_rate_first_try,
350
+ "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M"),
351
+ }
352
  leaderboard.append(entry)
353
+ save_leaderboard(leaderboard)
354
+ update_message = "New entry added to leaderboard!"
355
+ else:
356
+ # Existing model - only update if improved
357
+ old_legal_rate = entry.get("legal_rate", 0)
358
+ if new_legal_rate > old_legal_rate:
359
+ entry.update({
360
+ "user_id": user_id,
361
+ "legal_rate": new_legal_rate,
362
+ "legal_rate_first_try": new_legal_rate_first_try,
363
+ "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M"),
364
+ })
365
+ save_leaderboard(leaderboard)
366
+ update_message = f"Improved! Previous: {old_legal_rate*100:.1f}% → New: {new_legal_rate*100:.1f}%"
367
+ else:
368
+ update_message = f"ℹNo improvement. Current best: {old_legal_rate*100:.1f}%, This run: {new_legal_rate*100:.1f}%"
369
 
 
 
 
 
 
 
 
370
  progress(1.0, desc="Done!")
371
 
372
  return f"""
 
379
  | **Legal (with retries)** | {results['legal_first_try'] + results['legal_with_retry']} ({results['legal_rate_with_retry']*100:.1f}%) |
380
  | **Always Illegal** | {results['illegal_all_retries']} ({results['illegal_rate']*100:.1f}%) |
381
 
382
+ ### Leaderboard Update
383
+ {update_message}
384
+
385
  ### Interpretation
386
  - **>90% legal rate**: Great! Model has learned chess rules well.
387
  - **70-90% legal rate**: Decent, but room for improvement.