Spaces:
Running
Running
Danny Liu commited on
Commit ·
7c6ad47
1
Parent(s): 3a69eb0
keep debugging v4
Browse files
app.py
CHANGED
|
@@ -44,9 +44,11 @@ with demo:
|
|
| 44 |
|
| 45 |
gr.Markdown("## Benchmark")
|
| 46 |
gr.Markdown("### Model evaluation on VerilogEval-Human V1 benchmark (156 problems, 10 rollouts each)")
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
gr.
|
|
|
|
|
|
|
| 50 |
|
| 51 |
gr.Markdown("### Transition Matrices")
|
| 52 |
gr.Markdown("The transition matrices below show how errors evolve during the SFT and RL phases, revealing the surface convergence gap where optimization reduces syntax errors but increases functional testbench failures.")
|
|
|
|
| 44 |
|
| 45 |
gr.Markdown("## Benchmark")
|
| 46 |
gr.Markdown("### Model evaluation on VerilogEval-Human V1 benchmark (156 problems, 10 rollouts each)")
|
| 47 |
+
with gr.Row()
|
| 48 |
+
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
| 49 |
+
with gr.Row()
|
| 50 |
+
gr.Markdown("### Evaluation Results")
|
| 51 |
+
gr.Markdown(CONCLUSION_TEXT, elem_classes="markdown-text")
|
| 52 |
|
| 53 |
gr.Markdown("### Transition Matrices")
|
| 54 |
gr.Markdown("The transition matrices below show how errors evolve during the SFT and RL phases, revealing the surface convergence gap where optimization reduces syntax errors but increases functional testbench failures.")
|