import gradio as gr from gradio_leaderboard import Leaderboard, ColumnFilter import pandas as pd from src.about import ( CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, CONCLUSION_TEXT, LLM_BENCHMARKS_TEXT, TITLE, ) from src.display.css_html_js import custom_css from src.display.utils import ( BENCHMARK_COLS, COLS, AutoEvalColumn, fields, ) from src.populate import get_leaderboard_df LEADERBOARD_DF = get_leaderboard_df(COLS, BENCHMARK_COLS) def init_leaderboard(dataframe): if dataframe is None or dataframe.empty: raise ValueError("Leaderboard DataFrame is empty or None.") return Leaderboard( value=dataframe, datatype=[c.type for c in fields(AutoEvalColumn)], search_columns=[AutoEvalColumn.model.name], hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden], filter_columns=[ ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"), ], bool_checkboxgroup_label="Hide models", interactive=False, ) demo = gr.Blocks(css=custom_css) with demo: gr.HTML(TITLE) gr.Image("taxonomy_overview.png", elem_id="taxonomy-img", show_label=False, show_download_button=False) gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") gr.Markdown("## Benchmark") gr.Markdown("### Model evaluation on VerilogEval-Human V1 benchmark (156 problems, 10 rollouts each)") with gr.Column(elem_id="leaderboard-container"): leaderboard = init_leaderboard(LEADERBOARD_DF) gr.Markdown("### Evaluation Results") gr.Markdown(CONCLUSION_TEXT, elem_classes="markdown-text") gr.Markdown("### Transition Matrices") gr.Markdown("The transition matrices below show how errors evolve during the SFT and RL phases, revealing the surface convergence gap where optimization reduces syntax errors but increases functional testbench failures.") with gr.Row(): gr.Image("subq1_sft_transition_matrix.png", show_label=False, show_download_button=False) gr.Image("subq1_transition_matrix.png", show_label=False, show_download_button=False) with gr.Row(): with gr.Accordion("📙 Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, lines=20, elem_id="citation-button", show_copy_button=True, ) demo.queue(default_concurrency_limit=40).launch()