Spaces:
Runtime error
Runtime error
christodoulos.constantinides@ibm.com commited on
Commit ·
c40ac63
1
Parent(s): d4d8b2d
update
Browse files- app.py +10 -0
- src/about.py +5 -1
app.py
CHANGED
|
@@ -99,6 +99,16 @@ with demo:
|
|
| 99 |
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
| 100 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
| 103 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 104 |
|
|
|
|
| 99 |
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
| 100 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
| 101 |
|
| 102 |
+
with gr.TabItem("📊 Performance Plot", elem_id="llm-benchmark-tab-table", id=1):
|
| 103 |
+
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 104 |
+
print(LEADERBOARD_DF)
|
| 105 |
+
# with gr.Row():
|
| 106 |
+
# bs_1_plot = gr.components.Plot(
|
| 107 |
+
# value=plot_throughput(LEADERBOARD_DF, bs=1),
|
| 108 |
+
# elem_id="bs1-plot",
|
| 109 |
+
# show_label=False,
|
| 110 |
+
# )
|
| 111 |
+
|
| 112 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
| 113 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 114 |
|
src/about.py
CHANGED
|
@@ -33,7 +33,7 @@ Intro text
|
|
| 33 |
|
| 34 |
# Which evaluations are you running? how can people reproduce what you have?
|
| 35 |
LLM_BENCHMARKS_TEXT = '''
|
| 36 |
-
##
|
| 37 |
The prompt will follow the following style. Models' output are expected to follow this format.
|
| 38 |
```
|
| 39 |
Select the correct option(s) from the following options given the question. To solve the problem, follow the Let's think Step by Step reasoning strategy.
|
|
@@ -47,6 +47,10 @@ E voltage
|
|
| 47 |
{"step_1": "<Step 1 of your reasoning>", "step_2": "<Step 2 of your reasoning>", "step_n": "<Step n of your reasoning>", "answer": <the list of selected option, e.g., ["A", "B", "C", "D", "E"]>}
|
| 48 |
Your output in a single line:
|
| 49 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
## Reproducibility
|
| 51 |
To reproduce our results, here is the commands you can run:
|
| 52 |
|
|
|
|
| 33 |
|
| 34 |
# Which evaluations are you running? how can people reproduce what you have?
|
| 35 |
LLM_BENCHMARKS_TEXT = '''
|
| 36 |
+
## Prompt Format
|
| 37 |
The prompt will follow the following style. Models' output are expected to follow this format.
|
| 38 |
```
|
| 39 |
Select the correct option(s) from the following options given the question. To solve the problem, follow the Let's think Step by Step reasoning strategy.
|
|
|
|
| 47 |
{"step_1": "<Step 1 of your reasoning>", "step_2": "<Step 2 of your reasoning>", "step_n": "<Step n of your reasoning>", "answer": <the list of selected option, e.g., ["A", "B", "C", "D", "E"]>}
|
| 48 |
Your output in a single line:
|
| 49 |
```
|
| 50 |
+
## Expected Output Format
|
| 51 |
+
```
|
| 52 |
+
{"step_1": "<Step 1 of your reasoning>", "step_2": "<Step 2 of your reasoning>", "step_n": "<Step n of your reasoning>", "answer": <the list of selected option, e.g., ["A", "B", "C", "D", "E"]>}
|
| 53 |
+
```
|
| 54 |
## Reproducibility
|
| 55 |
To reproduce our results, here is the commands you can run:
|
| 56 |
|