Lev Israel commited on
Commit
2575879
Β·
1 Parent(s): 896528d

Background jobs and polling

Browse files
Files changed (1) hide show
  1. app.py +72 -27
app.py CHANGED
@@ -210,7 +210,8 @@ def start_evaluation(
210
  f"❌ {error}",
211
  f"❌ Invalid model ID: {error}",
212
  format_leaderboard_df(),
213
- gr.update(active=False), # timer
 
214
  )
215
  else:
216
  model_id = model_choice
@@ -227,7 +228,8 @@ def start_evaluation(
227
  "❌ API key required",
228
  f"❌ API key required for {model_id}. Please enter your {key_type.upper()} API key or set the {env_var} environment variable.",
229
  format_leaderboard_df(),
230
- gr.update(active=False),
 
231
  )
232
 
233
  # Get model display name
@@ -248,27 +250,28 @@ def start_evaluation(
248
 
249
  return (
250
  job.job_id,
251
- "⏳ Starting evaluation...",
252
  "",
253
  gr.update(), # Don't update leaderboard yet
254
- gr.update(active=True), # Activate timer
 
255
  )
256
 
257
 
258
- def poll_evaluation(job_id: str):
259
  """
260
- Poll for evaluation status by reading the job file.
261
 
262
- This is a stateless operation - each poll reads fresh data from disk.
263
- This allows the UI to track progress even if the original HTTP connection
264
- timed out or the page was refreshed.
265
  """
266
  if not job_id:
267
  return (
268
  "",
269
  "",
270
  gr.update(),
271
- gr.update(active=False),
 
272
  )
273
 
274
  job = get_job(job_id)
@@ -276,10 +279,11 @@ def poll_evaluation(job_id: str):
276
  if job is None:
277
  # Job not found - might have been cleaned up or never existed
278
  return (
279
- "Job not found. It may have expired or been cleaned up.",
280
  "",
281
  gr.update(),
282
- gr.update(active=False),
 
283
  )
284
 
285
  if job.status == "completed":
@@ -291,7 +295,8 @@ def poll_evaluation(job_id: str):
291
  job.progress,
292
  job.result or "",
293
  format_leaderboard_df(),
294
- gr.update(active=False), # Stop timer
 
295
  )
296
 
297
  elif job.status == "failed":
@@ -304,7 +309,8 @@ def poll_evaluation(job_id: str):
304
  job.progress,
305
  f"❌ Error: {error_msg}",
306
  format_leaderboard_df(),
307
- gr.update(active=False), # Stop timer
 
308
  )
309
 
310
  else:
@@ -317,7 +323,8 @@ def poll_evaluation(job_id: str):
317
  progress_with_pct,
318
  "",
319
  gr.update(), # Don't update leaderboard yet
320
- gr.update(active=True), # Keep timer running
 
321
  )
322
 
323
 
@@ -524,13 +531,27 @@ def create_app():
524
  variant="primary",
525
  size="lg",
526
  )
527
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
528
  status_text = gr.Markdown("")
529
-
530
  results_markdown = gr.Markdown("")
531
-
532
- # Timer for polling (initially inactive)
533
- poll_timer = gr.Timer(value=2, active=False)
534
 
535
  gr.Markdown("""
536
  ---
@@ -548,21 +569,45 @@ def create_app():
548
  inputs=[model_dropdown],
549
  outputs=[custom_model_input, api_key_input],
550
  )
551
-
552
  # Start evaluation: creates persistent job file and spawns background thread.
553
  # Returns immediately with job_id so UI doesn't timeout waiting.
554
  evaluate_btn.click(
555
  fn=start_evaluation,
556
  inputs=[model_dropdown, custom_model_input, api_key_input, max_pairs_slider],
557
- outputs=[job_id_state, status_text, results_markdown, leaderboard_table, poll_timer],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558
  )
559
 
560
- # Poll for updates by reading job file (stateless - survives connection timeouts).
561
- # Each poll is a fresh HTTP request that reads the job status from disk.
562
- poll_timer.tick(
563
- fn=poll_evaluation,
564
  inputs=[job_id_state],
565
- outputs=[status_text, results_markdown, leaderboard_table, poll_timer],
566
  )
567
 
568
  def refresh_leaderboard():
 
210
  f"❌ {error}",
211
  f"❌ Invalid model ID: {error}",
212
  format_leaderboard_df(),
213
+ gr.update(visible=False), # check_status_btn
214
+ gr.update(visible=False, value=""), # job_id_display
215
  )
216
  else:
217
  model_id = model_choice
 
228
  "❌ API key required",
229
  f"❌ API key required for {model_id}. Please enter your {key_type.upper()} API key or set the {env_var} environment variable.",
230
  format_leaderboard_df(),
231
+ gr.update(visible=False),
232
+ gr.update(visible=False, value=""),
233
  )
234
 
235
  # Get model display name
 
250
 
251
  return (
252
  job.job_id,
253
+ "⏳ Evaluation started! Click 'Check Status' to see progress (auto-refreshes every 5 seconds).",
254
  "",
255
  gr.update(), # Don't update leaderboard yet
256
+ gr.update(visible=True), # Show check_status_btn
257
+ gr.update(visible=True, value=f"Job ID: {job.job_id[:8]}..."), # Show job_id_display
258
  )
259
 
260
 
261
+ def check_job_status(job_id: str):
262
  """
263
+ Check job status by reading the job file.
264
 
265
+ This is a stateless operation - each check reads fresh data from disk.
266
+ Uses regular HTTP POST (not SSE) so it survives HF Spaces proxy timeouts.
 
267
  """
268
  if not job_id:
269
  return (
270
  "",
271
  "",
272
  gr.update(),
273
+ gr.update(visible=False), # Hide check button
274
+ gr.update(visible=False, value=""), # Hide job ID
275
  )
276
 
277
  job = get_job(job_id)
 
279
  if job is None:
280
  # Job not found - might have been cleaned up or never existed
281
  return (
282
+ "⚠️ Job not found. It may have expired or been cleaned up.",
283
  "",
284
  gr.update(),
285
+ gr.update(visible=False),
286
+ gr.update(visible=False, value=""),
287
  )
288
 
289
  if job.status == "completed":
 
295
  job.progress,
296
  job.result or "",
297
  format_leaderboard_df(),
298
+ gr.update(visible=False), # Hide check button
299
+ gr.update(visible=False, value=""), # Hide job ID
300
  )
301
 
302
  elif job.status == "failed":
 
309
  job.progress,
310
  f"❌ Error: {error_msg}",
311
  format_leaderboard_df(),
312
+ gr.update(visible=False), # Hide check button
313
+ gr.update(visible=False, value=""), # Hide job ID
314
  )
315
 
316
  else:
 
323
  progress_with_pct,
324
  "",
325
  gr.update(), # Don't update leaderboard yet
326
+ gr.update(visible=True), # Keep check button visible
327
+ gr.update(visible=True), # Keep job ID visible
328
  )
329
 
330
 
 
531
  variant="primary",
532
  size="lg",
533
  )
534
+
535
+ # Manual refresh button - visible when a job is running
536
+ # This uses regular HTTP POST (not SSE) so it survives proxy timeouts
537
+ with gr.Row():
538
+ check_status_btn = gr.Button(
539
+ "πŸ”„ Check Status",
540
+ variant="secondary",
541
+ size="sm",
542
+ visible=False,
543
+ )
544
+ job_id_display = gr.Textbox(
545
+ label="",
546
+ visible=False,
547
+ interactive=False,
548
+ container=False,
549
+ scale=2,
550
+ )
551
+
552
  status_text = gr.Markdown("")
553
+
554
  results_markdown = gr.Markdown("")
 
 
 
555
 
556
  gr.Markdown("""
557
  ---
 
569
  inputs=[model_dropdown],
570
  outputs=[custom_model_input, api_key_input],
571
  )
572
+
573
  # Start evaluation: creates persistent job file and spawns background thread.
574
  # Returns immediately with job_id so UI doesn't timeout waiting.
575
  evaluate_btn.click(
576
  fn=start_evaluation,
577
  inputs=[model_dropdown, custom_model_input, api_key_input, max_pairs_slider],
578
+ outputs=[job_id_state, status_text, results_markdown, leaderboard_table, check_status_btn, job_id_display],
579
+ ).then(
580
+ # Start JavaScript auto-polling after evaluation begins
581
+ fn=None,
582
+ inputs=None,
583
+ outputs=None,
584
+ js="""
585
+ () => {
586
+ // Clear any existing interval
587
+ if (window.jobPollInterval) {
588
+ clearInterval(window.jobPollInterval);
589
+ }
590
+ // Auto-click the check status button every 5 seconds
591
+ window.jobPollInterval = setInterval(() => {
592
+ const checkBtn = document.querySelector('button:has(span:contains("Check Status"))') ||
593
+ Array.from(document.querySelectorAll('button')).find(b => b.textContent.includes('Check Status'));
594
+ if (checkBtn && checkBtn.offsetParent !== null) {
595
+ checkBtn.click();
596
+ } else {
597
+ // Button is hidden (job done), stop polling
598
+ clearInterval(window.jobPollInterval);
599
+ window.jobPollInterval = null;
600
+ }
601
+ }, 5000);
602
+ }
603
+ """,
604
  )
605
 
606
+ # Check status button - uses regular HTTP POST (not SSE) so it survives proxy timeouts
607
+ check_status_btn.click(
608
+ fn=check_job_status,
 
609
  inputs=[job_id_state],
610
+ outputs=[status_text, results_markdown, leaderboard_table, check_status_btn, job_id_display],
611
  )
612
 
613
  def refresh_leaderboard():