File size: 2,546 Bytes
e148f6b
b233f03
e148f6b
 
 
 
 
26417d8
e148f6b
 
 
 
 
 
 
 
 
 
0e8f5d6
e148f6b
0e8f5d6
e148f6b
 
 
 
 
 
 
4e5862b
 
e148f6b
 
 
4e5862b
e148f6b
 
 
 
 
 
b233f03
3de1276
 
9a205f0
 
b233f03
10f2787
7c6ad47
10f2787
 
b233f03
 
 
 
10f2787
 
e148f6b
 
 
 
 
 
 
 
 
 
 
0e8f5d6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import gradio as gr
from gradio_leaderboard import Leaderboard, ColumnFilter
import pandas as pd

from src.about import (
    CITATION_BUTTON_LABEL,
    CITATION_BUTTON_TEXT,
    CONCLUSION_TEXT,
    LLM_BENCHMARKS_TEXT,
    TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
    BENCHMARK_COLS,
    COLS,
    AutoEvalColumn,
    fields,
)
from src.populate import get_leaderboard_df

LEADERBOARD_DF = get_leaderboard_df(COLS, BENCHMARK_COLS)

def init_leaderboard(dataframe):
    if dataframe is None or dataframe.empty:
        raise ValueError("Leaderboard DataFrame is empty or None.")
    return Leaderboard(
        value=dataframe,
        datatype=[c.type for c in fields(AutoEvalColumn)],
        search_columns=[AutoEvalColumn.model.name],
        hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
        filter_columns=[
            ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
        ],
        bool_checkboxgroup_label="Hide models",
        interactive=False,
    )

demo = gr.Blocks(css=custom_css)
with demo:
    gr.HTML(TITLE)
    
    gr.Image("taxonomy_overview.png", elem_id="taxonomy-img", show_label=False, show_download_button=False)
    gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
    
    gr.Markdown("## Benchmark")
    gr.Markdown("### Model evaluation on VerilogEval-Human V1 benchmark (156 problems, 10 rollouts each)")
    with gr.Column(elem_id="leaderboard-container"):
        leaderboard = init_leaderboard(LEADERBOARD_DF)
    gr.Markdown("### Evaluation Results")
    gr.Markdown(CONCLUSION_TEXT, elem_classes="markdown-text")
    
    gr.Markdown("### Transition Matrices")
    gr.Markdown("The transition matrices below show how errors evolve during the SFT and RL phases, revealing the surface convergence gap where optimization reduces syntax errors but increases functional testbench failures.")
    with gr.Row():
        gr.Image("subq1_sft_transition_matrix.png", show_label=False, show_download_button=False)
        gr.Image("subq1_transition_matrix.png", show_label=False, show_download_button=False)

    with gr.Row():
        with gr.Accordion("📙 Citation", open=False):
            citation_button = gr.Textbox(
                value=CITATION_BUTTON_TEXT,
                label=CITATION_BUTTON_LABEL,
                lines=20,
                elem_id="citation-button",
                show_copy_button=True,
            )

demo.queue(default_concurrency_limit=40).launch()