import os import re from pathlib import Path def patch_gradio_leaderboard(): """Patch gradio_leaderboard JS to fix crash on tab switch with Gradio 5.x.""" import gradio_leaderboard pkg_dir = Path(gradio_leaderboard.__file__).parent js_file = pkg_dir / "templates" / "component" / "Index-CzS_eGV6.js" if not js_file.exists(): return src = js_file.read_text() patches = [ # Fix 1 & 2: Guard r[39]/a[39] filter callback (undefined during Svelte outro) ( 'r[0].filter(\n /*func*/\n r[39]\n ).map(qd)', '(r[39] ? r[0].filter(r[39]) : r[0]).map(qd)', ), ( 'a[0].filter(\n /*func*/\n a[39]\n ).map(qd))', '(a[39] ? a[0].filter(a[39]) : a[0]).map(qd))', ), # Fix 3: Lx (Boolean) extracted from Rx (globals) which is undefined in Gradio 5 ( '{ Boolean: Lx } = Rx,', 'Lx = (Rx && Rx.Boolean) || Boolean,', ), ] patched = False for old, new in patches: if old in src: src = src.replace(old, new) patched = True if patched: js_file.write_text(src) patch_gradio_leaderboard() import gradio as gr from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns from apscheduler.schedulers.background import BackgroundScheduler from huggingface_hub import HfApi from src.leaderboard import get_leaderboard_df, get_benchmark_run_df from src.display.text_blocks import ( INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT, ) REPO_ID = "taagarwa/coding-agent-leaderboard" TOKEN = os.environ.get("HF_TOKEN") API = HfApi(token=TOKEN) def restart_space(): API.restart_space(repo_id=REPO_ID) LEADERBOARD_DF = get_leaderboard_df() BENCHMARK_RUN_DF = get_benchmark_run_df() def extract_body(s: str): return re.match(r'\[(.*?)\]', s).group(1) def build_header_html(df): n_results = len(df) n_models = df["Model"].nunique() n_harnesses = df["Harness"].apply(lambda s: extract_body(s)).nunique() n_benchmarks = df["Benchmark"].apply(lambda s: extract_body(s)).nunique() return f"""

Coding Agent Leaderboard

Compare coding agents across models and harnesses

{n_results} Results ยท {n_models} Models ยท {n_harnesses} Harnesses ยท {n_benchmarks} Benchmarks
""" def init_leaderboard(dataframe): if dataframe is None or dataframe.empty: raise ValueError("Leaderboard DataFrame is empty or None.") label_choices = [("๐ŸŸ  Fully FOSS", "๐ŸŸ "), ("๐Ÿ”ถ Proprietary", "๐Ÿ”ถ")] meta_columns = [" ", "Harness", "Model", "Harness License", "Model License", "Model Num Params (B)", "Precision"] benchmark_columns = [col for col in dataframe.columns if col not in meta_columns] model_choices = sorted({(extract_body(v), v) for v in dataframe["Model"]}) harness_choices = sorted({(extract_body(v), v) for v in dataframe["Harness"]}) default_columns = [" ", "Harness", "Model"] + benchmark_columns return Leaderboard( value=dataframe, select_columns=SelectColumns( default_selection=default_columns, label="Select Columns to Display:", ), datatype="markdown", search_columns=["Harness", "Model"], filter_columns=[ ColumnFilter(label="Category", column=" ", type="checkboxgroup", choices=label_choices), ColumnFilter(label="Model", column="Model", type="checkboxgroup", choices=model_choices), ColumnFilter(label="Harness", column="Harness", type="checkboxgroup", choices=harness_choices), ColumnFilter(label="Number of Parameters (B)", column="Model Num Params (B)", type="slider"), ColumnFilter(label="Precision", column="Precision", type="checkboxgroup"), ], interactive=False, ) def init_benchmark_runs(dataframe): if dataframe is None or dataframe.empty: raise ValueError("Leaderboard DataFrame is empty or None.") # Make ColumnFilter choices label_choices = [("๐ŸŸ  Fully FOSS", "๐ŸŸ "), ("๐Ÿ”ถ Proprietary", "๐Ÿ”ถ")] benchmark_choices = sorted({(extract_body(v), v) for v in dataframe["Benchmark"]}) return Leaderboard( value=dataframe, select_columns=SelectColumns( default_selection=[ " ", "Model", "Harness", "Benchmark", "Score", "Avg Cost Per Task (USD)", ], label="Select Columns to Display:", ), datatype="markdown", search_columns=[ "Benchmark", "Harness", "Model", ], filter_columns=[ ColumnFilter(label="Category", column=" ", type="checkboxgroup", choices=label_choices), ColumnFilter(label="Benchmark", column="Benchmark", type="checkboxgroup", choices=benchmark_choices), ColumnFilter(label="Number of Parameters (B)", column="Model Num Params (B)", type="slider"), ColumnFilter(label="Precision", column="Precision", type="checkboxgroup"), ], interactive=False, ) demo = gr.Blocks(theme="citrus") with demo: gr.HTML(build_header_html(BENCHMARK_RUN_DF)) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") with gr.Tabs(): with gr.Tab("๐Ÿ† Leaderboard"): leaderboard = init_leaderboard(LEADERBOARD_DF) with gr.Tab("๐Ÿƒ Benchmark Runs"): benchmark_runs = init_benchmark_runs(BENCHMARK_RUN_DF) with gr.Tab("๐Ÿ“ About"): gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") scheduler = BackgroundScheduler() scheduler.add_job(restart_space, "interval", seconds=1800) scheduler.start() demo.queue(default_concurrency_limit=40).launch()