Danny Liu commited on
Commit
b233f03
Β·
1 Parent(s): 4ae29ac

Add PNGs with LFS tracking

Browse files
.gitattributes CHANGED
@@ -32,4 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
35
  scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ *.png filter=lfs diff=lfs merge=lfs -text
36
  scale-hf-logo.png filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
 
5
  from src.about import (
@@ -26,12 +26,6 @@ def init_leaderboard(dataframe):
26
  return Leaderboard(
27
  value=dataframe,
28
  datatype=[c.type for c in fields(AutoEvalColumn)],
29
- select_columns=SelectColumns(
30
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
31
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
32
- label="Select Columns to Display:",
33
- ),
34
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
35
  hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
36
  filter_columns=[
37
  ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
@@ -44,13 +38,18 @@ demo = gr.Blocks(css=custom_css)
44
  with demo:
45
  gr.HTML(TITLE)
46
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
 
 
 
47
 
48
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
49
- with gr.TabItem("πŸ… RTL Models Benchmark", elem_id="llm-benchmark-tab-table", id=0):
50
- leaderboard = init_leaderboard(LEADERBOARD_DF)
51
-
52
- with gr.TabItem("πŸ“ Taxonomy & About", elem_id="llm-benchmark-tab-table", id=1):
53
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
 
54
 
55
  with gr.Row():
56
  with gr.Accordion("πŸ“™ Citation", open=False):
 
1
  import gradio as gr
2
+ from gradio_leaderboard import Leaderboard, ColumnFilter
3
  import pandas as pd
4
 
5
  from src.about import (
 
26
  return Leaderboard(
27
  value=dataframe,
28
  datatype=[c.type for c in fields(AutoEvalColumn)],
 
 
 
 
 
 
29
  hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
30
  filter_columns=[
31
  ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
 
38
  with demo:
39
  gr.HTML(TITLE)
40
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
41
+
42
+ gr.Image("taxonomy_overview.png", show_label=False, show_download_button=False)
43
+ gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
44
 
45
+ gr.Markdown("### Model evaluation on VerilogEval-Human V1 benchmark (156 problems, 10 rollouts each)")
46
+ leaderboard = init_leaderboard(LEADERBOARD_DF)
47
+
48
+ gr.Markdown("### Transition Matrices")
49
+ gr.Markdown("The transition matrices below show how errors evolve during the SFT and RL phases, revealing the surface convergence gap where optimization reduces syntax errors but increases functional testbench failures.")
50
+ with gr.Row():
51
+ gr.Image("subq1_sft_transition_matrix.png", label="SFT Transition Matrix", show_label=True, show_download_button=False)
52
+ gr.Image("subq1_transition_matrix.png", label="RL Transition Matrix", show_label=True, show_download_button=False)
53
 
54
  with gr.Row():
55
  with gr.Accordion("πŸ“™ Citation", open=False):
src/about.py CHANGED
@@ -24,7 +24,7 @@ NUM_FEWSHOT = 0 # Change with your few shot
24
 
25
 
26
  # Your leaderboard name
27
- TITLE = """<h1 align="center" id="space-title">RTL Error Analysis Leaderboard</h1>"""
28
 
29
  # What does your leaderboard evaluate?
30
  INTRODUCTION_TEXT = """
 
24
 
25
 
26
  # Your leaderboard name
27
+ TITLE = """<h1 align="center" id="space-title">How LLMs Fail and Generalize in RTL Coding for Hardware Design?</h1>"""
28
 
29
  # What does your leaderboard evaluate?
30
  INTRODUCTION_TEXT = """
src/display/utils.py CHANGED
@@ -23,13 +23,12 @@ class ColumnContent:
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
25
  # Init
26
- auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
  #Scores
29
  for task in Tasks:
30
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
31
  # Model information
32
- auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
33
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
34
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False, hidden=True)])
35
 
 
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
25
  # Init
 
26
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
27
  #Scores
28
  for task in Tasks:
29
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
30
  # Model information
31
+ auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", True)])
32
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
33
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False, hidden=True)])
34
 
src/populate.py CHANGED
@@ -10,10 +10,10 @@ def get_leaderboard_df(cols: list, benchmark_cols: list) -> pd.DataFrame:
10
  df = pd.read_csv(csv_path)
11
 
12
  # Add model_type_symbol based on model_type
13
- def get_symbol(mtype):
14
- return ModelType.from_str(str(mtype)).value.symbol
15
 
16
- df["model_type_symbol"] = df["model_type"].apply(get_symbol)
17
 
18
  # Sort by pass_rate
19
  if "pass_rate" in df.columns:
@@ -21,7 +21,6 @@ def get_leaderboard_df(cols: list, benchmark_cols: list) -> pd.DataFrame:
21
 
22
  # Rename columns to match the expected names in AutoEvalColumn
23
  rename_map = {
24
- "model_type_symbol": AutoEvalColumn.model_type_symbol.name,
25
  "model": AutoEvalColumn.model.name,
26
  "model_type": AutoEvalColumn.model_type.name,
27
  "params": AutoEvalColumn.params.name,
 
10
  df = pd.read_csv(csv_path)
11
 
12
  # Add model_type_symbol based on model_type
13
+ def get_type_with_symbol(mtype):
14
+ return ModelType.from_str(str(mtype)).to_str()
15
 
16
+ df["model_type"] = df["model_type"].apply(get_type_with_symbol)
17
 
18
  # Sort by pass_rate
19
  if "pass_rate" in df.columns:
 
21
 
22
  # Rename columns to match the expected names in AutoEvalColumn
23
  rename_map = {
 
24
  "model": AutoEvalColumn.model.name,
25
  "model_type": AutoEvalColumn.model_type.name,
26
  "params": AutoEvalColumn.params.name,
subq1_sft_transition_matrix.png ADDED

Git LFS Details

  • SHA256: 6015ca0f440183452a8da26552f32d497897c4badba4d6c049790f32335a5932
  • Pointer size: 131 Bytes
  • Size of remote file: 141 kB
subq1_transition_matrix.png ADDED

Git LFS Details

  • SHA256: 6da070fd2e2ff01a6f022039b9e05a7efce8d45d0a88104618f60b027ae2c59f
  • Pointer size: 131 Bytes
  • Size of remote file: 132 kB
taxonomy_overview.png ADDED

Git LFS Details

  • SHA256: 451558ba0027d0f394d348a8c7c9e7c15ba9ec6dd9b578c8252c5a9c6b4b0786
  • Pointer size: 131 Bytes
  • Size of remote file: 618 kB