Danny Liu
minor fix about the format
3de1276
import gradio as gr
from gradio_leaderboard import Leaderboard, ColumnFilter
import pandas as pd
from src.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
CONCLUSION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
BENCHMARK_COLS,
COLS,
AutoEvalColumn,
fields,
)
from src.populate import get_leaderboard_df
LEADERBOARD_DF = get_leaderboard_df(COLS, BENCHMARK_COLS)
def init_leaderboard(dataframe):
if dataframe is None or dataframe.empty:
raise ValueError("Leaderboard DataFrame is empty or None.")
return Leaderboard(
value=dataframe,
datatype=[c.type for c in fields(AutoEvalColumn)],
search_columns=[AutoEvalColumn.model.name],
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
filter_columns=[
ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
],
bool_checkboxgroup_label="Hide models",
interactive=False,
)
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Image("taxonomy_overview.png", elem_id="taxonomy-img", show_label=False, show_download_button=False)
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
gr.Markdown("## Benchmark")
gr.Markdown("### Model evaluation on VerilogEval-Human V1 benchmark (156 problems, 10 rollouts each)")
with gr.Column(elem_id="leaderboard-container"):
leaderboard = init_leaderboard(LEADERBOARD_DF)
gr.Markdown("### Evaluation Results")
gr.Markdown(CONCLUSION_TEXT, elem_classes="markdown-text")
gr.Markdown("### Transition Matrices")
gr.Markdown("The transition matrices below show how errors evolve during the SFT and RL phases, revealing the surface convergence gap where optimization reduces syntax errors but increases functional testbench failures.")
with gr.Row():
gr.Image("subq1_sft_transition_matrix.png", show_label=False, show_download_button=False)
gr.Image("subq1_transition_matrix.png", show_label=False, show_download_button=False)
with gr.Row():
with gr.Accordion("📙 Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
lines=20,
elem_id="citation-button",
show_copy_button=True,
)
demo.queue(default_concurrency_limit=40).launch()