Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -68,7 +68,14 @@ def add_new_eval(
|
|
| 68 |
else:
|
| 69 |
content = input_file.decode("utf-8")
|
| 70 |
prediction = prediction_analyse(content)
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
# count for average image\video\all
|
| 74 |
total_correct_image = sum(prediction[i]["correct"] for i in range(1, 10))
|
|
@@ -77,20 +84,43 @@ def add_new_eval(
|
|
| 77 |
total_image = sum(prediction[i]["total"] for i in range(1, 10))
|
| 78 |
total_video = sum(prediction[i]["total"] for i in range(10, 13))
|
| 79 |
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
-
if LLM_type == '
|
| 85 |
LLM_name = LLM_name_textbox
|
| 86 |
else:
|
| 87 |
LLM_name = LLM_type
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
if model_link == '':
|
| 91 |
-
model_name =
|
| 92 |
else:
|
| 93 |
-
model_name = '[' +
|
|
|
|
| 94 |
# add new data
|
| 95 |
new_data = [
|
| 96 |
model_type,
|
|
@@ -112,22 +142,8 @@ def add_new_eval(
|
|
| 112 |
average_accuracy_video,
|
| 113 |
overall_accuracy]
|
| 114 |
# pdb.set_trace()
|
| 115 |
-
csv_data =
|
| 116 |
-
|
| 117 |
-
# pdb.set_trace()
|
| 118 |
-
if revision_name_textbox == '':
|
| 119 |
-
col = csv_data.shape[0]
|
| 120 |
-
csv_data.loc[col] = new_data
|
| 121 |
-
csv_data = csv_data.to_csv(CSV_DIR, index=False)
|
| 122 |
-
else:
|
| 123 |
-
model_name_list = csv_data['Model']
|
| 124 |
-
name_list = [name.split(']')[0][1:] for name in model_name_list]
|
| 125 |
-
if revision_name_textbox not in name_list:
|
| 126 |
-
col = csv_data.shape[0]
|
| 127 |
-
else:
|
| 128 |
-
col = name_list.index(revision_name_textbox)
|
| 129 |
-
csv_data.loc[col] = new_data
|
| 130 |
-
csv_data = csv_data.to_csv(CSV_DIR, index=False)
|
| 131 |
return 0
|
| 132 |
|
| 133 |
def get_baseline_df():
|
|
@@ -204,6 +220,8 @@ with block:
|
|
| 204 |
with gr.TabItem("🚀 Submit here! ", elem_id="seed-benchmark-tab-table", id=3):
|
| 205 |
gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
|
| 206 |
|
|
|
|
|
|
|
| 207 |
|
| 208 |
with gr.Row():
|
| 209 |
gr.Markdown("# ✉️✨ Submit your model evaluation json file here!", elem_classes="markdown-text")
|
|
@@ -235,14 +253,14 @@ with block:
|
|
| 235 |
with gr.Column():
|
| 236 |
|
| 237 |
LLM_type = gr.Dropdown(
|
| 238 |
-
choices=["Vicuna-7B", "Flan-T5-XL", "LLaMA-7B", "
|
| 239 |
label="LLM type",
|
| 240 |
multiselect=False,
|
| 241 |
value="LLaMA-7B",
|
| 242 |
interactive=True,
|
| 243 |
)
|
| 244 |
LLM_name_textbox = gr.Textbox(
|
| 245 |
-
label="LLM model (for
|
| 246 |
placeholder="LLaMA-13B"
|
| 247 |
)
|
| 248 |
Evaluation_dimension = gr.Dropdown(
|
|
|
|
| 68 |
else:
|
| 69 |
content = input_file.decode("utf-8")
|
| 70 |
prediction = prediction_analyse(content)
|
| 71 |
+
csv_data = pd.read_csv(CSV_DIR)
|
| 72 |
+
|
| 73 |
+
Start_dimension, End_dimension = 1, 13
|
| 74 |
+
if Evaluation_dimension == 'Image':
|
| 75 |
+
End_dimension = 10
|
| 76 |
+
elif Evaluation_dimension == 'Video':
|
| 77 |
+
Start_dimension = 10
|
| 78 |
+
each_task_accuracy = {i: round(prediction[i]["correct"] / prediction[i]["total"] * 100, 1) if i >= Start_dimension and i < End_dimension else 0 for i in range(1, 13)}
|
| 79 |
|
| 80 |
# count for average image\video\all
|
| 81 |
total_correct_image = sum(prediction[i]["correct"] for i in range(1, 10))
|
|
|
|
| 84 |
total_image = sum(prediction[i]["total"] for i in range(1, 10))
|
| 85 |
total_video = sum(prediction[i]["total"] for i in range(10, 13))
|
| 86 |
|
| 87 |
+
if Evaluation_dimension != 'Video':
|
| 88 |
+
average_accuracy_image = round(total_correct_image / total_image * 100, 1)
|
| 89 |
+
else:
|
| 90 |
+
average_accuracy_image = 0
|
| 91 |
+
|
| 92 |
+
if Evaluation_dimension != 'Image':
|
| 93 |
+
average_accuracy_video = round(total_correct_video / total_video * 100, 1)
|
| 94 |
+
else:
|
| 95 |
+
average_accuracy_video = 0
|
| 96 |
+
|
| 97 |
+
if Evaluation_dimension == 'All':
|
| 98 |
+
overall_accuracy = round((total_correct_image + total_correct_video) / (total_image + total_video) * 100, 1)
|
| 99 |
+
else:
|
| 100 |
+
overall_accuracy = 0
|
| 101 |
|
| 102 |
+
if LLM_type == 'Other':
|
| 103 |
LLM_name = LLM_name_textbox
|
| 104 |
else:
|
| 105 |
LLM_name = LLM_type
|
| 106 |
|
| 107 |
+
if revision_name_textbox == '':
|
| 108 |
+
col = csv_data.shape[0]
|
| 109 |
+
model_name = model_name_textbox
|
| 110 |
+
else:
|
| 111 |
+
model_name = revision_name_textbox
|
| 112 |
+
model_name_list = csv_data['Model']
|
| 113 |
+
name_list = [name.split(']')[0][1:] for name in model_name_list]
|
| 114 |
+
if revision_name_textbox not in name_list:
|
| 115 |
+
col = csv_data.shape[0]
|
| 116 |
+
else:
|
| 117 |
+
col = name_list.index(revision_name_textbox)
|
| 118 |
|
| 119 |
if model_link == '':
|
| 120 |
+
model_name = model_name # no url
|
| 121 |
else:
|
| 122 |
+
model_name = '[' + model_name + '](' + model_link + ')'
|
| 123 |
+
|
| 124 |
# add new data
|
| 125 |
new_data = [
|
| 126 |
model_type,
|
|
|
|
| 142 |
average_accuracy_video,
|
| 143 |
overall_accuracy]
|
| 144 |
# pdb.set_trace()
|
| 145 |
+
csv_data.loc[col] = new_data
|
| 146 |
+
csv_data = csv_data.to_csv(CSV_DIR, index=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
return 0
|
| 148 |
|
| 149 |
def get_baseline_df():
|
|
|
|
| 220 |
with gr.TabItem("🚀 Submit here! ", elem_id="seed-benchmark-tab-table", id=3):
|
| 221 |
gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
|
| 222 |
|
| 223 |
+
with gr.Row():
|
| 224 |
+
gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
|
| 225 |
|
| 226 |
with gr.Row():
|
| 227 |
gr.Markdown("# ✉️✨ Submit your model evaluation json file here!", elem_classes="markdown-text")
|
|
|
|
| 253 |
with gr.Column():
|
| 254 |
|
| 255 |
LLM_type = gr.Dropdown(
|
| 256 |
+
choices=["Vicuna-7B", "Flan-T5-XL", "LLaMA-7B", "Other"],
|
| 257 |
label="LLM type",
|
| 258 |
multiselect=False,
|
| 259 |
value="LLaMA-7B",
|
| 260 |
interactive=True,
|
| 261 |
)
|
| 262 |
LLM_name_textbox = gr.Textbox(
|
| 263 |
+
label="LLM model (for Other)",
|
| 264 |
placeholder="LLaMA-13B"
|
| 265 |
)
|
| 266 |
Evaluation_dimension = gr.Dropdown(
|