Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns | |
| import pandas as pd | |
| from apscheduler.schedulers.background import BackgroundScheduler | |
| from huggingface_hub import snapshot_download | |
| from src.about import ( | |
| CITATION_BUTTON_LABEL, | |
| CITATION_BUTTON_TEXT, | |
| EVALUATION_QUEUE_TEXT, | |
| INTRODUCTION_TEXT, | |
| LLM_BENCHMARKS_TEXT, | |
| TITLE, | |
| ) | |
| from src.display.css_html_js import custom_css | |
| from src.display.utils import ( | |
| COLS, | |
| ST_BENCHMARK_COLS, | |
| AGENTIC_BENCHMARK_COLS, | |
| EVAL_COLS, | |
| EVAL_TYPES, | |
| AutoEvalColumn, | |
| ModelType, | |
| fields, | |
| WeightType, | |
| Precision | |
| ) | |
| from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN | |
| from src.populate import get_evaluation_queue_df, get_leaderboard_df, TASK_NAME_INVERSE_MAP | |
| from src.submission.submit import add_new_eval | |
| def restart_space(): | |
| API.restart_space(repo_id=REPO_ID) | |
| ### Space initialisation | |
| try: | |
| print(EVAL_REQUESTS_PATH) | |
| snapshot_download( | |
| repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN | |
| ) | |
| except Exception: | |
| restart_space() | |
| try: | |
| print(EVAL_RESULTS_PATH) | |
| snapshot_download( | |
| repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN | |
| ) | |
| except Exception: | |
| restart_space() | |
| ST_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, ST_BENCHMARK_COLS) | |
| AGENTIC_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, AGENTIC_BENCHMARK_COLS) | |
| ( | |
| finished_eval_queue_df, | |
| running_eval_queue_df, | |
| pending_eval_queue_df, | |
| ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS) | |
| def init_leaderboard(dataframe, benchmark_type): | |
| if dataframe is None or dataframe.empty: | |
| raise ValueError("Leaderboard DataFrame is empty or None.") | |
| AutoEvalColumnSubset = [c for c in fields(AutoEvalColumn) if ((c.name=="Model") or (TASK_NAME_INVERSE_MAP.get(c.name, dict()).get("type", "")==benchmark_type))] | |
| return gr.components.Dataframe( | |
| value=dataframe, | |
| datatype=[c.type for c in AutoEvalColumnSubset], | |
| column_widths=["150px" if c.name != "Model" else "250px" for c in AutoEvalColumnSubset], | |
| wrap=False, | |
| ) | |
| black_logo_path = "src/assets/logo-icon-black.png" | |
| white_logo_path = "src/assets/logo-icon-white.png" | |
| demo = gr.Blocks( | |
| css=custom_css, | |
| theme=gr.themes.Default(primary_hue=gr.themes.colors.pink), | |
| fill_height=True, | |
| fill_width=True, | |
| ) | |
| with demo: | |
| gr.HTML(f""" | |
| <div id="page-header"> | |
| <div id="header-container"> | |
| <div id="left-container"> | |
| <img id="black-logo" src="/gradio_api/file={black_logo_path}"> | |
| <img id="white-logo" src="/gradio_api/file={white_logo_path}"> | |
| </div> | |
| <div id="centre-container"> | |
| <h1 style="margin-bottom: 0.25rem;">{TITLE}</h1> | |
| <p style="color:#eb088a; margin:0; font-size:1.2rem;">Performance Insights & Comparison</p> | |
| </div> | |
| <div id="right-container"></div> | |
| </div> | |
| </div> | |
| """) | |
| # gr.HTML(TITLE) | |
| with gr.Group(elem_classes="intro-block"): | |
| gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
| # gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
| with gr.Tabs(elem_classes=["leaderboard-table", "tab-buttons"]) as tabs: | |
| with gr.TabItem("Base Benchmark", elem_classes="llm-benchmark-tab-table", id=0): | |
| leaderboard = init_leaderboard(ST_LEADERBOARD_DF, "base") | |
| with gr.TabItem("Agentic Benchmark", elem_classes="llm-benchmark-tab-table", id=1): | |
| leaderboard = init_leaderboard(AGENTIC_LEADERBOARD_DF, "agentic") | |
| with gr.TabItem("About", elem_classes="llm-benchmark-tab-table", id=2): | |
| gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | |
| assets = [black_logo_path, white_logo_path] | |
| scheduler = BackgroundScheduler() | |
| scheduler.add_job(restart_space, "interval", seconds=1800) | |
| scheduler.start() | |
| demo.queue(default_concurrency_limit=40).launch(allowed_paths=assets) |