import os
import re
from pathlib import Path
def patch_gradio_leaderboard():
"""Patch gradio_leaderboard JS to fix crash on tab switch with Gradio 5.x."""
import gradio_leaderboard
pkg_dir = Path(gradio_leaderboard.__file__).parent
js_file = pkg_dir / "templates" / "component" / "Index-CzS_eGV6.js"
if not js_file.exists():
return
src = js_file.read_text()
patches = [
# Fix 1 & 2: Guard r[39]/a[39] filter callback (undefined during Svelte outro)
(
'r[0].filter(\n /*func*/\n r[39]\n ).map(qd)',
'(r[39] ? r[0].filter(r[39]) : r[0]).map(qd)',
),
(
'a[0].filter(\n /*func*/\n a[39]\n ).map(qd))',
'(a[39] ? a[0].filter(a[39]) : a[0]).map(qd))',
),
# Fix 3: Lx (Boolean) extracted from Rx (globals) which is undefined in Gradio 5
(
'{ Boolean: Lx } = Rx,',
'Lx = (Rx && Rx.Boolean) || Boolean,',
),
]
patched = False
for old, new in patches:
if old in src:
src = src.replace(old, new)
patched = True
if patched:
js_file.write_text(src)
patch_gradio_leaderboard()
import gradio as gr
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import HfApi
from src.leaderboard import get_leaderboard_df, get_benchmark_run_df
from src.display.text_blocks import (
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
)
REPO_ID = "taagarwa/coding-agent-leaderboard"
TOKEN = os.environ.get("HF_TOKEN")
API = HfApi(token=TOKEN)
def restart_space():
API.restart_space(repo_id=REPO_ID)
LEADERBOARD_DF = get_leaderboard_df()
BENCHMARK_RUN_DF = get_benchmark_run_df()
def extract_body(s: str):
return re.match(r'\[(.*?)\]', s).group(1)
def build_header_html(df):
n_results = len(df)
n_models = df["Model"].nunique()
n_harnesses = df["Harness"].apply(lambda s: extract_body(s)).nunique()
n_benchmarks = df["Benchmark"].apply(lambda s: extract_body(s)).nunique()
return f"""
Coding Agent Leaderboard
Compare coding agents across models and harnesses
{n_results} Results
ยท
{n_models} Models
ยท
{n_harnesses} Harnesses
ยท
{n_benchmarks} Benchmarks
"""
def init_leaderboard(dataframe):
if dataframe is None or dataframe.empty:
raise ValueError("Leaderboard DataFrame is empty or None.")
label_choices = [("๐ Fully FOSS", "๐ "), ("๐ถ Proprietary", "๐ถ")]
meta_columns = [" ", "Harness", "Model", "Harness License", "Model License", "Model Num Params (B)", "Precision"]
benchmark_columns = [col for col in dataframe.columns if col not in meta_columns]
model_choices = sorted({(extract_body(v), v) for v in dataframe["Model"]})
harness_choices = sorted({(extract_body(v), v) for v in dataframe["Harness"]})
default_columns = [" ", "Harness", "Model"] + benchmark_columns
return Leaderboard(
value=dataframe,
select_columns=SelectColumns(
default_selection=default_columns,
label="Select Columns to Display:",
),
datatype="markdown",
search_columns=["Harness", "Model"],
filter_columns=[
ColumnFilter(label="Category", column=" ", type="checkboxgroup", choices=label_choices),
ColumnFilter(label="Model", column="Model", type="checkboxgroup", choices=model_choices),
ColumnFilter(label="Harness", column="Harness", type="checkboxgroup", choices=harness_choices),
ColumnFilter(label="Number of Parameters (B)", column="Model Num Params (B)", type="slider"),
ColumnFilter(label="Precision", column="Precision", type="checkboxgroup"),
],
interactive=False,
)
def init_benchmark_runs(dataframe):
if dataframe is None or dataframe.empty:
raise ValueError("Leaderboard DataFrame is empty or None.")
# Make ColumnFilter choices
label_choices = [("๐ Fully FOSS", "๐ "), ("๐ถ Proprietary", "๐ถ")]
benchmark_choices = sorted({(extract_body(v), v) for v in dataframe["Benchmark"]})
return Leaderboard(
value=dataframe,
select_columns=SelectColumns(
default_selection=[
" ",
"Model",
"Harness",
"Benchmark",
"Score",
"Avg Cost Per Task (USD)",
],
label="Select Columns to Display:",
),
datatype="markdown",
search_columns=[
"Benchmark",
"Harness",
"Model",
],
filter_columns=[
ColumnFilter(label="Category", column=" ", type="checkboxgroup", choices=label_choices),
ColumnFilter(label="Benchmark", column="Benchmark", type="checkboxgroup", choices=benchmark_choices),
ColumnFilter(label="Number of Parameters (B)", column="Model Num Params (B)", type="slider"),
ColumnFilter(label="Precision", column="Precision", type="checkboxgroup"),
],
interactive=False,
)
demo = gr.Blocks(theme="citrus")
with demo:
gr.HTML(build_header_html(BENCHMARK_RUN_DF))
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs():
with gr.Tab("๐ Leaderboard"):
leaderboard = init_leaderboard(LEADERBOARD_DF)
with gr.Tab("๐ Benchmark Runs"):
benchmark_runs = init_benchmark_runs(BENCHMARK_RUN_DF)
with gr.Tab("๐ About"):
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
demo.queue(default_concurrency_limit=40).launch()