Spaces:
Running
Running
Danny Liu commited on
Commit Β·
b233f03
1
Parent(s): 4ae29ac
Add PNGs with LFS tracking
Browse files- .gitattributes +1 -0
- app.py +12 -13
- src/about.py +1 -1
- src/display/utils.py +1 -2
- src/populate.py +3 -4
- subq1_sft_transition_matrix.png +3 -0
- subq1_transition_matrix.png +3 -0
- taxonomy_overview.png +3 -0
.gitattributes
CHANGED
|
@@ -32,4 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 35 |
scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
| 36 |
scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from gradio_leaderboard import Leaderboard, ColumnFilter
|
| 3 |
import pandas as pd
|
| 4 |
|
| 5 |
from src.about import (
|
|
@@ -26,12 +26,6 @@ def init_leaderboard(dataframe):
|
|
| 26 |
return Leaderboard(
|
| 27 |
value=dataframe,
|
| 28 |
datatype=[c.type for c in fields(AutoEvalColumn)],
|
| 29 |
-
select_columns=SelectColumns(
|
| 30 |
-
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
|
| 31 |
-
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
|
| 32 |
-
label="Select Columns to Display:",
|
| 33 |
-
),
|
| 34 |
-
search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
|
| 35 |
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
|
| 36 |
filter_columns=[
|
| 37 |
ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
|
|
@@ -44,13 +38,18 @@ demo = gr.Blocks(css=custom_css)
|
|
| 44 |
with demo:
|
| 45 |
gr.HTML(TITLE)
|
| 46 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
| 54 |
|
| 55 |
with gr.Row():
|
| 56 |
with gr.Accordion("π Citation", open=False):
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from gradio_leaderboard import Leaderboard, ColumnFilter
|
| 3 |
import pandas as pd
|
| 4 |
|
| 5 |
from src.about import (
|
|
|
|
| 26 |
return Leaderboard(
|
| 27 |
value=dataframe,
|
| 28 |
datatype=[c.type for c in fields(AutoEvalColumn)],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
|
| 30 |
filter_columns=[
|
| 31 |
ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
|
|
|
|
| 38 |
with demo:
|
| 39 |
gr.HTML(TITLE)
|
| 40 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
| 41 |
+
|
| 42 |
+
gr.Image("taxonomy_overview.png", show_label=False, show_download_button=False)
|
| 43 |
+
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 44 |
|
| 45 |
+
gr.Markdown("### Model evaluation on VerilogEval-Human V1 benchmark (156 problems, 10 rollouts each)")
|
| 46 |
+
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
| 47 |
+
|
| 48 |
+
gr.Markdown("### Transition Matrices")
|
| 49 |
+
gr.Markdown("The transition matrices below show how errors evolve during the SFT and RL phases, revealing the surface convergence gap where optimization reduces syntax errors but increases functional testbench failures.")
|
| 50 |
+
with gr.Row():
|
| 51 |
+
gr.Image("subq1_sft_transition_matrix.png", label="SFT Transition Matrix", show_label=True, show_download_button=False)
|
| 52 |
+
gr.Image("subq1_transition_matrix.png", label="RL Transition Matrix", show_label=True, show_download_button=False)
|
| 53 |
|
| 54 |
with gr.Row():
|
| 55 |
with gr.Accordion("π Citation", open=False):
|
src/about.py
CHANGED
|
@@ -24,7 +24,7 @@ NUM_FEWSHOT = 0 # Change with your few shot
|
|
| 24 |
|
| 25 |
|
| 26 |
# Your leaderboard name
|
| 27 |
-
TITLE = """<h1 align="center" id="space-title">RTL
|
| 28 |
|
| 29 |
# What does your leaderboard evaluate?
|
| 30 |
INTRODUCTION_TEXT = """
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
# Your leaderboard name
|
| 27 |
+
TITLE = """<h1 align="center" id="space-title">How LLMs Fail and Generalize in RTL Coding for Hardware Design?</h1>"""
|
| 28 |
|
| 29 |
# What does your leaderboard evaluate?
|
| 30 |
INTRODUCTION_TEXT = """
|
src/display/utils.py
CHANGED
|
@@ -23,13 +23,12 @@ class ColumnContent:
|
|
| 23 |
## Leaderboard columns
|
| 24 |
auto_eval_column_dict = []
|
| 25 |
# Init
|
| 26 |
-
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
| 27 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
| 28 |
#Scores
|
| 29 |
for task in Tasks:
|
| 30 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 31 |
# Model information
|
| 32 |
-
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str",
|
| 33 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
| 34 |
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False, hidden=True)])
|
| 35 |
|
|
|
|
| 23 |
## Leaderboard columns
|
| 24 |
auto_eval_column_dict = []
|
| 25 |
# Init
|
|
|
|
| 26 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
| 27 |
#Scores
|
| 28 |
for task in Tasks:
|
| 29 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 30 |
# Model information
|
| 31 |
+
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", True)])
|
| 32 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
| 33 |
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False, hidden=True)])
|
| 34 |
|
src/populate.py
CHANGED
|
@@ -10,10 +10,10 @@ def get_leaderboard_df(cols: list, benchmark_cols: list) -> pd.DataFrame:
|
|
| 10 |
df = pd.read_csv(csv_path)
|
| 11 |
|
| 12 |
# Add model_type_symbol based on model_type
|
| 13 |
-
def
|
| 14 |
-
return ModelType.from_str(str(mtype)).
|
| 15 |
|
| 16 |
-
df["
|
| 17 |
|
| 18 |
# Sort by pass_rate
|
| 19 |
if "pass_rate" in df.columns:
|
|
@@ -21,7 +21,6 @@ def get_leaderboard_df(cols: list, benchmark_cols: list) -> pd.DataFrame:
|
|
| 21 |
|
| 22 |
# Rename columns to match the expected names in AutoEvalColumn
|
| 23 |
rename_map = {
|
| 24 |
-
"model_type_symbol": AutoEvalColumn.model_type_symbol.name,
|
| 25 |
"model": AutoEvalColumn.model.name,
|
| 26 |
"model_type": AutoEvalColumn.model_type.name,
|
| 27 |
"params": AutoEvalColumn.params.name,
|
|
|
|
| 10 |
df = pd.read_csv(csv_path)
|
| 11 |
|
| 12 |
# Add model_type_symbol based on model_type
|
| 13 |
+
def get_type_with_symbol(mtype):
|
| 14 |
+
return ModelType.from_str(str(mtype)).to_str()
|
| 15 |
|
| 16 |
+
df["model_type"] = df["model_type"].apply(get_type_with_symbol)
|
| 17 |
|
| 18 |
# Sort by pass_rate
|
| 19 |
if "pass_rate" in df.columns:
|
|
|
|
| 21 |
|
| 22 |
# Rename columns to match the expected names in AutoEvalColumn
|
| 23 |
rename_map = {
|
|
|
|
| 24 |
"model": AutoEvalColumn.model.name,
|
| 25 |
"model_type": AutoEvalColumn.model_type.name,
|
| 26 |
"params": AutoEvalColumn.params.name,
|
subq1_sft_transition_matrix.png
ADDED
|
Git LFS Details
|
subq1_transition_matrix.png
ADDED
|
Git LFS Details
|
taxonomy_overview.png
ADDED
|
Git LFS Details
|