nathanael-fijalkow commited on
Commit
03b46b4
·
1 Parent(s): 90ae99b

another try

Browse files
Files changed (1) hide show
  1. app.py +11 -108
app.py CHANGED
@@ -44,100 +44,6 @@ LEADERBOARD_COLUMNS = [
44
  "last_updated",
45
  ]
46
 
47
-
48
- # =============================================================================
49
- # Webhook Queue and Worker
50
- # =============================================================================
51
-
52
- eval_queue = queue.Queue()
53
- eval_status = {} # Track status of queued evaluations
54
- eval_lock = threading.Lock()
55
-
56
-
57
- def evaluation_worker():
58
- """Background worker that processes evaluation queue."""
59
- while True:
60
- try:
61
- model_id = eval_queue.get()
62
-
63
- with eval_lock:
64
- eval_status[model_id] = "running"
65
-
66
- print(f"[Webhook Worker] Starting evaluation for: {model_id}")
67
-
68
- try:
69
- sys.path.insert(0, str(Path(__file__).parent))
70
- from src.evaluate import (
71
- ChessEvaluator,
72
- load_model_and_tokenizer,
73
- post_discussion_summary,
74
- )
75
-
76
- # Load and evaluate
77
- model, tokenizer, _ = load_model_and_tokenizer(model_id, verbose=True)
78
- evaluator = ChessEvaluator(model=model, tokenizer=tokenizer, model_path=model_id)
79
- result = evaluator.evaluate(verbose=True)
80
-
81
- # Update leaderboard if evaluation succeeded
82
- if result.passed_param_check and result.passed_pychess_check and not result.error_message:
83
- user_id = get_model_submitter(model_id)
84
- if user_id:
85
- leaderboard = load_leaderboard()
86
- user_entry = next((e for e in leaderboard if e.get("user_id") == user_id), None)
87
-
88
- new_entry = {
89
- "model_id": model_id,
90
- "user_id": user_id,
91
- "n_parameters": result.n_parameters,
92
- "legal_rate_first_try": result.legal_rate_first_try,
93
- "legal_rate_with_retry": result.legal_rate_with_retry,
94
- "games_played": result.games_played,
95
- "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M"),
96
- }
97
-
98
- if user_entry is None:
99
- leaderboard.append(new_entry)
100
- save_leaderboard(leaderboard)
101
- print(f"[Webhook Worker] Added {model_id} to leaderboard")
102
- elif result.legal_rate_with_retry > user_entry.get("legal_rate_with_retry", 0):
103
- user_entry.update(new_entry)
104
- save_leaderboard(leaderboard)
105
- print(f"[Webhook Worker] Updated {model_id} on leaderboard (improvement)")
106
- else:
107
- print(f"[Webhook Worker] {model_id} - no improvement, not updating leaderboard")
108
-
109
- # Post results to model discussion
110
- if HF_TOKEN:
111
- try:
112
- post_discussion_summary(model_id, result, HF_TOKEN)
113
- print(f"[Webhook Worker] Posted results to {model_id} discussion")
114
- except Exception as e:
115
- print(f"[Webhook Worker] Failed to post discussion: {e}")
116
- else:
117
- print(f"[Webhook Worker] Could not determine submitter for {model_id}")
118
- else:
119
- print(f"[Webhook Worker] Evaluation failed for {model_id}: {result.error_message}")
120
-
121
- with eval_lock:
122
- eval_status[model_id] = "completed"
123
-
124
- except Exception as e:
125
- print(f"[Webhook Worker] Error evaluating {model_id}: {e}")
126
- with eval_lock:
127
- eval_status[model_id] = f"error: {str(e)}"
128
-
129
- except Exception as e:
130
- print(f"[Webhook Worker] Queue error: {e}")
131
- finally:
132
- eval_queue.task_done()
133
-
134
-
135
- # Start the background worker thread
136
- worker_thread = threading.Thread(target=evaluation_worker, daemon=True)
137
- worker_thread.start()
138
- print("[Webhook] Evaluation worker started")
139
-
140
-
141
  def is_chess_model(model_id: str) -> bool:
142
  """Check if a model ID looks like a chess challenge submission."""
143
  if not model_id.startswith(f"{ORGANIZATION}/"):
@@ -171,9 +77,6 @@ def load_leaderboard() -> list:
171
  )
172
 
173
  df = pd.read_csv(csv_path)
174
- # Map 'legal_rate' column to 'legal_rate_with_retry' if present
175
- if 'legal_rate_with_retry' not in df.columns and 'legal_rate' in df.columns:
176
- df['legal_rate_with_retry'] = df['legal_rate']
177
  return df.to_dict(orient="records")
178
 
179
  except Exception as e:
@@ -283,16 +186,16 @@ def format_leaderboard_html(data: list) -> str:
283
  if not data:
284
  return "<p>No models evaluated yet. Be the first to submit!</p>"
285
 
286
- # Keep only the best entry per user (by legal_rate_with_retry)
287
  best_per_user = {}
288
  for entry in data:
289
  user_id = entry.get("user_id", "unknown")
290
- legal_rate = entry.get("legal_rate_with_retry", 0)
291
- if user_id not in best_per_user or legal_rate > best_per_user[user_id].get("legal_rate_with_retry", 0):
292
  best_per_user[user_id] = entry
293
 
294
- # Sort by legal_rate_with_retry
295
- sorted_data = sorted(best_per_user.values(), key=lambda x: x.get("legal_rate_with_retry", 0), reverse=True)
296
 
297
  html = """
298
  <style>
@@ -339,7 +242,7 @@ def format_leaderboard_html(data: list) -> str:
339
  rank_display = str(i)
340
  model_url = f"https://huggingface.co/{entry['model_id']}"
341
  # Color code legal rate
342
- legal_rate = entry.get('legal_rate_with_retry', 0)
343
  if legal_rate >= 0.9:
344
  legal_class = "legal-good"
345
  elif legal_rate >= 0.7:
@@ -465,7 +368,7 @@ which adds the required metadata to the README.md file.
465
  "user_id": user_id,
466
  "n_parameters": result.n_parameters,
467
  "legal_rate_first_try": result.legal_rate_first_try,
468
- "legal_rate_with_retry": result.legal_rate_with_retry,
469
  "games_played": result.games_played,
470
  "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M"),
471
  }
@@ -475,13 +378,13 @@ which adds the required metadata to the README.md file.
475
  save_leaderboard(leaderboard)
476
  update_message = "New entry added to leaderboard!"
477
  else:
478
- old_rate = user_entry.get("legal_rate_with_retry", 0)
479
- if result.legal_rate_with_retry > old_rate:
480
  user_entry.update(new_entry)
481
  save_leaderboard(leaderboard)
482
- update_message = f"Improved! {old_rate*100:.1f}% -> {result.legal_rate_with_retry*100:.1f}%"
483
  else:
484
- update_message = f"No improvement. Best: {old_rate*100:.1f}%, This run: {result.legal_rate_with_retry*100:.1f}%"
485
 
486
  # Post discussion to model page
487
  if HF_TOKEN:
 
44
  "last_updated",
45
  ]
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  def is_chess_model(model_id: str) -> bool:
48
  """Check if a model ID looks like a chess challenge submission."""
49
  if not model_id.startswith(f"{ORGANIZATION}/"):
 
77
  )
78
 
79
  df = pd.read_csv(csv_path)
 
 
 
80
  return df.to_dict(orient="records")
81
 
82
  except Exception as e:
 
186
  if not data:
187
  return "<p>No models evaluated yet. Be the first to submit!</p>"
188
 
189
+ # Keep only the best entry per user (by legal_rate)
190
  best_per_user = {}
191
  for entry in data:
192
  user_id = entry.get("user_id", "unknown")
193
+ legal_rate = entry.get("legal_rate", 0)
194
+ if user_id not in best_per_user or legal_rate > best_per_user[user_id].get("legal_rate", 0):
195
  best_per_user[user_id] = entry
196
 
197
+ # Sort by legal_rate
198
+ sorted_data = sorted(best_per_user.values(), key=lambda x: x.get("legal_rate", 0), reverse=True)
199
 
200
  html = """
201
  <style>
 
242
  rank_display = str(i)
243
  model_url = f"https://huggingface.co/{entry['model_id']}"
244
  # Color code legal rate
245
+ legal_rate = entry.get('legal_rate', 0)
246
  if legal_rate >= 0.9:
247
  legal_class = "legal-good"
248
  elif legal_rate >= 0.7:
 
368
  "user_id": user_id,
369
  "n_parameters": result.n_parameters,
370
  "legal_rate_first_try": result.legal_rate_first_try,
371
+ "legal_rate": result.legal_rate,
372
  "games_played": result.games_played,
373
  "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M"),
374
  }
 
378
  save_leaderboard(leaderboard)
379
  update_message = "New entry added to leaderboard!"
380
  else:
381
+ old_rate = user_entry.get("legal_rate", 0)
382
+ if result.legal_rate > old_rate:
383
  user_entry.update(new_entry)
384
  save_leaderboard(leaderboard)
385
+ update_message = f"Improved! {old_rate*100:.1f}% -> {result.legal_rate*100:.1f}%"
386
  else:
387
+ update_message = f"No improvement. Best: {old_rate*100:.1f}%, This run: {result.legal_rate*100:.1f}%"
388
 
389
  # Post discussion to model page
390
  if HF_TOKEN: