Lev Israel
commited on
Commit
Β·
2575879
1
Parent(s):
896528d
Background jobs and polling
Browse files
app.py
CHANGED
|
@@ -210,7 +210,8 @@ def start_evaluation(
|
|
| 210 |
f"β {error}",
|
| 211 |
f"β Invalid model ID: {error}",
|
| 212 |
format_leaderboard_df(),
|
| 213 |
-
gr.update(
|
|
|
|
| 214 |
)
|
| 215 |
else:
|
| 216 |
model_id = model_choice
|
|
@@ -227,7 +228,8 @@ def start_evaluation(
|
|
| 227 |
"β API key required",
|
| 228 |
f"β API key required for {model_id}. Please enter your {key_type.upper()} API key or set the {env_var} environment variable.",
|
| 229 |
format_leaderboard_df(),
|
| 230 |
-
gr.update(
|
|
|
|
| 231 |
)
|
| 232 |
|
| 233 |
# Get model display name
|
|
@@ -248,27 +250,28 @@ def start_evaluation(
|
|
| 248 |
|
| 249 |
return (
|
| 250 |
job.job_id,
|
| 251 |
-
"β³
|
| 252 |
"",
|
| 253 |
gr.update(), # Don't update leaderboard yet
|
| 254 |
-
gr.update(
|
|
|
|
| 255 |
)
|
| 256 |
|
| 257 |
|
| 258 |
-
def
|
| 259 |
"""
|
| 260 |
-
|
| 261 |
|
| 262 |
-
This is a stateless operation - each
|
| 263 |
-
|
| 264 |
-
timed out or the page was refreshed.
|
| 265 |
"""
|
| 266 |
if not job_id:
|
| 267 |
return (
|
| 268 |
"",
|
| 269 |
"",
|
| 270 |
gr.update(),
|
| 271 |
-
gr.update(
|
|
|
|
| 272 |
)
|
| 273 |
|
| 274 |
job = get_job(job_id)
|
|
@@ -276,10 +279,11 @@ def poll_evaluation(job_id: str):
|
|
| 276 |
if job is None:
|
| 277 |
# Job not found - might have been cleaned up or never existed
|
| 278 |
return (
|
| 279 |
-
"Job not found. It may have expired or been cleaned up.",
|
| 280 |
"",
|
| 281 |
gr.update(),
|
| 282 |
-
gr.update(
|
|
|
|
| 283 |
)
|
| 284 |
|
| 285 |
if job.status == "completed":
|
|
@@ -291,7 +295,8 @@ def poll_evaluation(job_id: str):
|
|
| 291 |
job.progress,
|
| 292 |
job.result or "",
|
| 293 |
format_leaderboard_df(),
|
| 294 |
-
gr.update(
|
|
|
|
| 295 |
)
|
| 296 |
|
| 297 |
elif job.status == "failed":
|
|
@@ -304,7 +309,8 @@ def poll_evaluation(job_id: str):
|
|
| 304 |
job.progress,
|
| 305 |
f"β Error: {error_msg}",
|
| 306 |
format_leaderboard_df(),
|
| 307 |
-
gr.update(
|
|
|
|
| 308 |
)
|
| 309 |
|
| 310 |
else:
|
|
@@ -317,7 +323,8 @@ def poll_evaluation(job_id: str):
|
|
| 317 |
progress_with_pct,
|
| 318 |
"",
|
| 319 |
gr.update(), # Don't update leaderboard yet
|
| 320 |
-
gr.update(
|
|
|
|
| 321 |
)
|
| 322 |
|
| 323 |
|
|
@@ -524,13 +531,27 @@ def create_app():
|
|
| 524 |
variant="primary",
|
| 525 |
size="lg",
|
| 526 |
)
|
| 527 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 528 |
status_text = gr.Markdown("")
|
| 529 |
-
|
| 530 |
results_markdown = gr.Markdown("")
|
| 531 |
-
|
| 532 |
-
# Timer for polling (initially inactive)
|
| 533 |
-
poll_timer = gr.Timer(value=2, active=False)
|
| 534 |
|
| 535 |
gr.Markdown("""
|
| 536 |
---
|
|
@@ -548,21 +569,45 @@ def create_app():
|
|
| 548 |
inputs=[model_dropdown],
|
| 549 |
outputs=[custom_model_input, api_key_input],
|
| 550 |
)
|
| 551 |
-
|
| 552 |
# Start evaluation: creates persistent job file and spawns background thread.
|
| 553 |
# Returns immediately with job_id so UI doesn't timeout waiting.
|
| 554 |
evaluate_btn.click(
|
| 555 |
fn=start_evaluation,
|
| 556 |
inputs=[model_dropdown, custom_model_input, api_key_input, max_pairs_slider],
|
| 557 |
-
outputs=[job_id_state, status_text, results_markdown, leaderboard_table,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 558 |
)
|
| 559 |
|
| 560 |
-
#
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
fn=poll_evaluation,
|
| 564 |
inputs=[job_id_state],
|
| 565 |
-
outputs=[status_text, results_markdown, leaderboard_table,
|
| 566 |
)
|
| 567 |
|
| 568 |
def refresh_leaderboard():
|
|
|
|
| 210 |
f"β {error}",
|
| 211 |
f"β Invalid model ID: {error}",
|
| 212 |
format_leaderboard_df(),
|
| 213 |
+
gr.update(visible=False), # check_status_btn
|
| 214 |
+
gr.update(visible=False, value=""), # job_id_display
|
| 215 |
)
|
| 216 |
else:
|
| 217 |
model_id = model_choice
|
|
|
|
| 228 |
"β API key required",
|
| 229 |
f"β API key required for {model_id}. Please enter your {key_type.upper()} API key or set the {env_var} environment variable.",
|
| 230 |
format_leaderboard_df(),
|
| 231 |
+
gr.update(visible=False),
|
| 232 |
+
gr.update(visible=False, value=""),
|
| 233 |
)
|
| 234 |
|
| 235 |
# Get model display name
|
|
|
|
| 250 |
|
| 251 |
return (
|
| 252 |
job.job_id,
|
| 253 |
+
"β³ Evaluation started! Click 'Check Status' to see progress (auto-refreshes every 5 seconds).",
|
| 254 |
"",
|
| 255 |
gr.update(), # Don't update leaderboard yet
|
| 256 |
+
gr.update(visible=True), # Show check_status_btn
|
| 257 |
+
gr.update(visible=True, value=f"Job ID: {job.job_id[:8]}..."), # Show job_id_display
|
| 258 |
)
|
| 259 |
|
| 260 |
|
| 261 |
+
def check_job_status(job_id: str):
|
| 262 |
"""
|
| 263 |
+
Check job status by reading the job file.
|
| 264 |
|
| 265 |
+
This is a stateless operation - each check reads fresh data from disk.
|
| 266 |
+
Uses regular HTTP POST (not SSE) so it survives HF Spaces proxy timeouts.
|
|
|
|
| 267 |
"""
|
| 268 |
if not job_id:
|
| 269 |
return (
|
| 270 |
"",
|
| 271 |
"",
|
| 272 |
gr.update(),
|
| 273 |
+
gr.update(visible=False), # Hide check button
|
| 274 |
+
gr.update(visible=False, value=""), # Hide job ID
|
| 275 |
)
|
| 276 |
|
| 277 |
job = get_job(job_id)
|
|
|
|
| 279 |
if job is None:
|
| 280 |
# Job not found - might have been cleaned up or never existed
|
| 281 |
return (
|
| 282 |
+
"β οΈ Job not found. It may have expired or been cleaned up.",
|
| 283 |
"",
|
| 284 |
gr.update(),
|
| 285 |
+
gr.update(visible=False),
|
| 286 |
+
gr.update(visible=False, value=""),
|
| 287 |
)
|
| 288 |
|
| 289 |
if job.status == "completed":
|
|
|
|
| 295 |
job.progress,
|
| 296 |
job.result or "",
|
| 297 |
format_leaderboard_df(),
|
| 298 |
+
gr.update(visible=False), # Hide check button
|
| 299 |
+
gr.update(visible=False, value=""), # Hide job ID
|
| 300 |
)
|
| 301 |
|
| 302 |
elif job.status == "failed":
|
|
|
|
| 309 |
job.progress,
|
| 310 |
f"β Error: {error_msg}",
|
| 311 |
format_leaderboard_df(),
|
| 312 |
+
gr.update(visible=False), # Hide check button
|
| 313 |
+
gr.update(visible=False, value=""), # Hide job ID
|
| 314 |
)
|
| 315 |
|
| 316 |
else:
|
|
|
|
| 323 |
progress_with_pct,
|
| 324 |
"",
|
| 325 |
gr.update(), # Don't update leaderboard yet
|
| 326 |
+
gr.update(visible=True), # Keep check button visible
|
| 327 |
+
gr.update(visible=True), # Keep job ID visible
|
| 328 |
)
|
| 329 |
|
| 330 |
|
|
|
|
| 531 |
variant="primary",
|
| 532 |
size="lg",
|
| 533 |
)
|
| 534 |
+
|
| 535 |
+
# Manual refresh button - visible when a job is running
|
| 536 |
+
# This uses regular HTTP POST (not SSE) so it survives proxy timeouts
|
| 537 |
+
with gr.Row():
|
| 538 |
+
check_status_btn = gr.Button(
|
| 539 |
+
"π Check Status",
|
| 540 |
+
variant="secondary",
|
| 541 |
+
size="sm",
|
| 542 |
+
visible=False,
|
| 543 |
+
)
|
| 544 |
+
job_id_display = gr.Textbox(
|
| 545 |
+
label="",
|
| 546 |
+
visible=False,
|
| 547 |
+
interactive=False,
|
| 548 |
+
container=False,
|
| 549 |
+
scale=2,
|
| 550 |
+
)
|
| 551 |
+
|
| 552 |
status_text = gr.Markdown("")
|
| 553 |
+
|
| 554 |
results_markdown = gr.Markdown("")
|
|
|
|
|
|
|
|
|
|
| 555 |
|
| 556 |
gr.Markdown("""
|
| 557 |
---
|
|
|
|
| 569 |
inputs=[model_dropdown],
|
| 570 |
outputs=[custom_model_input, api_key_input],
|
| 571 |
)
|
| 572 |
+
|
| 573 |
# Start evaluation: creates persistent job file and spawns background thread.
|
| 574 |
# Returns immediately with job_id so UI doesn't timeout waiting.
|
| 575 |
evaluate_btn.click(
|
| 576 |
fn=start_evaluation,
|
| 577 |
inputs=[model_dropdown, custom_model_input, api_key_input, max_pairs_slider],
|
| 578 |
+
outputs=[job_id_state, status_text, results_markdown, leaderboard_table, check_status_btn, job_id_display],
|
| 579 |
+
).then(
|
| 580 |
+
# Start JavaScript auto-polling after evaluation begins
|
| 581 |
+
fn=None,
|
| 582 |
+
inputs=None,
|
| 583 |
+
outputs=None,
|
| 584 |
+
js="""
|
| 585 |
+
() => {
|
| 586 |
+
// Clear any existing interval
|
| 587 |
+
if (window.jobPollInterval) {
|
| 588 |
+
clearInterval(window.jobPollInterval);
|
| 589 |
+
}
|
| 590 |
+
// Auto-click the check status button every 5 seconds
|
| 591 |
+
window.jobPollInterval = setInterval(() => {
|
| 592 |
+
const checkBtn = document.querySelector('button:has(span:contains("Check Status"))') ||
|
| 593 |
+
Array.from(document.querySelectorAll('button')).find(b => b.textContent.includes('Check Status'));
|
| 594 |
+
if (checkBtn && checkBtn.offsetParent !== null) {
|
| 595 |
+
checkBtn.click();
|
| 596 |
+
} else {
|
| 597 |
+
// Button is hidden (job done), stop polling
|
| 598 |
+
clearInterval(window.jobPollInterval);
|
| 599 |
+
window.jobPollInterval = null;
|
| 600 |
+
}
|
| 601 |
+
}, 5000);
|
| 602 |
+
}
|
| 603 |
+
""",
|
| 604 |
)
|
| 605 |
|
| 606 |
+
# Check status button - uses regular HTTP POST (not SSE) so it survives proxy timeouts
|
| 607 |
+
check_status_btn.click(
|
| 608 |
+
fn=check_job_status,
|
|
|
|
| 609 |
inputs=[job_id_state],
|
| 610 |
+
outputs=[status_text, results_markdown, leaderboard_table, check_status_btn, job_id_display],
|
| 611 |
)
|
| 612 |
|
| 613 |
def refresh_leaderboard():
|