import os import gradio as gr from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns from apscheduler.schedulers.background import BackgroundScheduler from huggingface_hub import HfApi from src.leaderboard import get_leaderboard_df, DISPLAY_BY_DEFAULT, SEARCH_COLUMNS from src.display.text_blocks import ( TITLE, INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, ) REPO_ID = "taagarwa/coding-agent-leaderboard" TOKEN = os.environ.get("HF_TOKEN") API = HfApi(token=TOKEN) def restart_space(): API.restart_space(repo_id=REPO_ID) LEADERBOARD_DF = get_leaderboard_df() def init_leaderboard(dataframe): if dataframe is None or dataframe.empty: raise ValueError("Leaderboard DataFrame is empty or None.") return Leaderboard( value=dataframe, select_columns=SelectColumns( default_selection=DISPLAY_BY_DEFAULT, label="Select Columns to Display:", ), search_columns=SEARCH_COLUMNS, filter_columns=[ ColumnFilter(label="Dataset", column="dataset", type="checkboxgroup"), ColumnFilter(label="Number of Parameters (B)", column="model_num_params", type="slider", min=0.5, max=150), ColumnFilter(label="Precision", column="precision", type="checkboxgroup"), ], interactive=False, ) demo = gr.Blocks() with demo: gr.HTML(TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.TabItem("🏅 Coding Agent Benchmark", elem_id="llm-benchmark-tab-table", id=0): leaderboard = init_leaderboard(LEADERBOARD_DF) gr.Markdown("\* `internal` refers to internal benchmarks performed by the model provider where the harness/environment were not made public") with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2): gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") with gr.Row(): with gr.Accordion("📙 Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, lines=20, elem_id="citation-button", show_copy_button=True, ) scheduler = BackgroundScheduler() scheduler.add_job(restart_space, "interval", seconds=1800) scheduler.start() demo.queue(default_concurrency_limit=40).launch()