Clémentine
change for model init in relaunch
99dd7b3
raw
history blame
4.89 kB
import gradio as gr
import time
from apscheduler.schedulers.background import BackgroundScheduler
import threading
import globals
from globals import TASKS, LOCAL_CONFIG_FILE
from utils.io import initialize_models_providers_file, save_results, load_results, load_models_providers, get_results_table
from utils.jobs import run_single_job, launch_jobs, update_job_statuses
from typing import List, Optional
def status_monitor() -> None:
"""Background thread to monitor job statuses."""
while True:
update_job_statuses()
time.sleep(240) # Check every 30 seconds
def daily_checkpoint() -> None:
"""Daily checkpoint - save current state."""
print("Daily checkpoint - saving current state")
save_results()
# Create Gradio interface
def create_app() -> gr.Blocks:
with gr.Blocks(title="Inference Provider Testing Dashboard") as demo:
gr.Markdown("# Inference Provider Testing Dashboard")
gr.Markdown("Launch and monitor evaluation jobs for multiple models and providers.")
output = gr.Textbox(label="Logs and status", interactive=False)
with gr.Row():
with gr.Column():
gr.Markdown("## Initialize Config File")
init_btn = gr.Button("Fetch and Initialize Models/Providers", variant="secondary")
with gr.Row():
with gr.Column():
gr.Markdown("## Launch Jobs")
launch_btn = gr.Button("Launch All Jobs", variant="primary")
with gr.Row():
with gr.Column():
gr.Markdown("## Job Results")
results_table = gr.Dataframe(
headers=["Model", "Provider", "Last Run", "Status", "Current Score", "Previous Score", "Latest Job Id"],
value=get_results_table(),
interactive=False,
wrap=True
)
refresh_btn = gr.Button("Refresh Results")
with gr.Row():
with gr.Column():
gr.Markdown("## Relaunch Individual Job")
# Load model-provider combinations
relaunch_model = gr.Dropdown(
label="Model",
choices=[],
interactive=True
)
relaunch_provider = gr.Dropdown(
label="Provider",
choices=[],
interactive=True
)
relaunch_btn = gr.Button("Relaunch Job", variant="secondary")
def update_model_choices() -> gr.update:
models_providers = load_models_providers(LOCAL_CONFIG_FILE)
model_choices = sorted(list(set([mp[0] for mp in models_providers])))
return gr.update(choices=model_choices, value=model_choices[0] if model_choices else None)
def update_provider_choices(model: Optional[str]) -> gr.update:
"""Update provider dropdown based on selected model."""
if not model:
return gr.update(choices=[])
# Get providers for the selected model from the config file
models_providers = load_models_providers(LOCAL_CONFIG_FILE)
providers = [mp[1] for mp in models_providers if mp[0] == model]
return gr.update(choices=providers, value=providers[0] if providers else None)
# Event handlers
init_btn.click(
fn=initialize_models_providers_file,
outputs=output
)
init_btn.click(
fn=update_model_choices,
outputs=relaunch_model
)
launch_btn.click(
fn=launch_jobs,
outputs=output
)
refresh_btn.click(
fn=get_results_table,
outputs=results_table
)
# Update provider dropdown when model is selected
relaunch_model.change(
fn=update_provider_choices,
inputs=relaunch_model,
outputs=relaunch_provider
)
relaunch_btn.click(
fn=run_single_job,
inputs=[relaunch_model, relaunch_provider],
outputs=output
)
return demo
if __name__ == "__main__":
# Load previous results
load_results()
print("Starting Inference Provider Testing Dashboard")
# Start status monitor thread
monitor_thread = threading.Thread(target=status_monitor, daemon=True)
monitor_thread.start()
print("Job status monitor started")
# Start APScheduler for daily checkpoint
scheduler = BackgroundScheduler()
scheduler.add_job(daily_checkpoint, 'cron', hour=0, minute=0) # Run at midnight
scheduler.start()
print("Daily checkpoint scheduler started (saves at 00:00)")
# Create and launch the Gradio interface
demo = create_app()
demo.launch(server_name="0.0.0.0", server_port=7860)