Spaces:
Running
Running
fix
Browse files
app.py
CHANGED
|
@@ -84,11 +84,11 @@ print(f'Term length dataframe is {term_length_df}')
|
|
| 84 |
variate_type_df = pivot_df('results/grouped_results_by_univariate.csv', tab_name='univariate')
|
| 85 |
print(f'Variate type dataframe is {variate_type_df}')
|
| 86 |
|
| 87 |
-
(
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
| 92 |
|
| 93 |
|
| 94 |
def init_leaderboard(dataframe):
|
|
@@ -150,51 +150,51 @@ with demo:
|
|
| 150 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=4):
|
| 151 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 152 |
|
| 153 |
-
with gr.TabItem("π Submit here! ", elem_id="llm-benchmark-tab-table", id=5):
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
|
| 199 |
# with gr.Row():
|
| 200 |
# with gr.Column():
|
|
|
|
| 84 |
variate_type_df = pivot_df('results/grouped_results_by_univariate.csv', tab_name='univariate')
|
| 85 |
print(f'Variate type dataframe is {variate_type_df}')
|
| 86 |
|
| 87 |
+
# (
|
| 88 |
+
# finished_eval_queue_df,
|
| 89 |
+
# running_eval_queue_df,
|
| 90 |
+
# pending_eval_queue_df,
|
| 91 |
+
# ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
| 92 |
|
| 93 |
|
| 94 |
def init_leaderboard(dataframe):
|
|
|
|
| 150 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=4):
|
| 151 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 152 |
|
| 153 |
+
# with gr.TabItem("π Submit here! ", elem_id="llm-benchmark-tab-table", id=5):
|
| 154 |
+
# with gr.Column():
|
| 155 |
+
# with gr.Row():
|
| 156 |
+
# gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
| 157 |
+
#
|
| 158 |
+
# with gr.Column():
|
| 159 |
+
# with gr.Accordion(
|
| 160 |
+
# f"β
Finished Evaluations ({len(finished_eval_queue_df)})",
|
| 161 |
+
# open=False,
|
| 162 |
+
# ):
|
| 163 |
+
# with gr.Row():
|
| 164 |
+
# finished_eval_table = gr.components.Dataframe(
|
| 165 |
+
# value=finished_eval_queue_df,
|
| 166 |
+
# headers=EVAL_COLS,
|
| 167 |
+
# datatype=EVAL_TYPES,
|
| 168 |
+
# row_count=5,
|
| 169 |
+
# )
|
| 170 |
+
# with gr.Accordion(
|
| 171 |
+
# f"π Running Evaluation Queue ({len(running_eval_queue_df)})",
|
| 172 |
+
# open=False,
|
| 173 |
+
# ):
|
| 174 |
+
# with gr.Row():
|
| 175 |
+
# running_eval_table = gr.components.Dataframe(
|
| 176 |
+
# value=running_eval_queue_df,
|
| 177 |
+
# headers=EVAL_COLS,
|
| 178 |
+
# datatype=EVAL_TYPES,
|
| 179 |
+
# row_count=5,
|
| 180 |
+
# )
|
| 181 |
+
#
|
| 182 |
+
# with gr.Accordion(
|
| 183 |
+
# f"β³ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
|
| 184 |
+
# open=False,
|
| 185 |
+
# ):
|
| 186 |
+
# with gr.Row():
|
| 187 |
+
# pending_eval_table = gr.components.Dataframe(
|
| 188 |
+
# value=pending_eval_queue_df,
|
| 189 |
+
# headers=EVAL_COLS,
|
| 190 |
+
# datatype=EVAL_TYPES,
|
| 191 |
+
# row_count=5,
|
| 192 |
+
# )
|
| 193 |
+
# with gr.Row():
|
| 194 |
+
# gr.Markdown("# βοΈβ¨ Submit your model outputs !", elem_classes="markdown-text")
|
| 195 |
+
# gr.Markdown(
|
| 196 |
+
# "Send your model outputs for all the models using the ContextualBench code and email them to us at xnguyen@salesforce.com ",
|
| 197 |
+
# elem_classes="markdown-text")
|
| 198 |
|
| 199 |
# with gr.Row():
|
| 200 |
# with gr.Column():
|