Spaces:

BreakLee
/

SEED-Bench

Sleeping

App Files Files Community

BreakLee commited on Aug 15, 2023

Commit

75d4504

1 Parent(s): fec9185

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -25

app.py CHANGED Viewed

@@ -68,7 +68,14 @@ def add_new_eval(
     else:
         content = input_file.decode("utf-8")
         prediction = prediction_analyse(content)
-        each_task_accuracy = {i: round(prediction[i]["correct"] / prediction[i]["total"] * 100, 1) for i in range(1, 13)}
         # count for average image\video\all
         total_correct_image = sum(prediction[i]["correct"] for i in range(1, 10))
@@ -77,20 +84,43 @@ def add_new_eval(
         total_image = sum(prediction[i]["total"] for i in range(1, 10))
         total_video = sum(prediction[i]["total"] for i in range(10, 13))
-        average_accuracy_image = round(total_correct_image / total_image * 100, 1)
-        average_accuracy_video = round(total_correct_video / total_video * 100, 1)
-        overall_accuracy = round((total_correct_image + total_correct_video) / (total_image + total_video) * 100, 1)
-        if LLM_type == 'other':
             LLM_name = LLM_name_textbox
         else:
             LLM_name = LLM_type
         if model_link == '':
-            model_name = model_name_textbox  # no url
         else:
-            model_name = '[' + model_name_textbox + '](' + model_link + ')'
         # add new data
         new_data = [
             model_type,
@@ -112,22 +142,8 @@ def add_new_eval(
             average_accuracy_video,
             overall_accuracy]
         # pdb.set_trace()
-        csv_data = pd.read_csv(CSV_DIR)
-        # pdb.set_trace()
-        if revision_name_textbox == '':
-            col = csv_data.shape[0]
-            csv_data.loc[col] = new_data
-            csv_data = csv_data.to_csv(CSV_DIR, index=False)
-        else:
-            model_name_list = csv_data['Model']
-            name_list = [name.split(']')[0][1:] for name in model_name_list]
-            if revision_name_textbox not in name_list:
-                col = csv_data.shape[0]
-            else:
-                col = name_list.index(revision_name_textbox)
-            csv_data.loc[col] = new_data
-            csv_data = csv_data.to_csv(CSV_DIR, index=False)
     return 0
 def get_baseline_df():
@@ -204,6 +220,8 @@ with block:
         with gr.TabItem("🚀 Submit here! ", elem_id="seed-benchmark-tab-table", id=3):
             gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
             with gr.Row():
                 gr.Markdown("# ✉️✨ Submit your model evaluation json file here!", elem_classes="markdown-text")
@@ -235,14 +253,14 @@ with block:
                 with gr.Column():
                     LLM_type = gr.Dropdown(
-                        choices=["Vicuna-7B", "Flan-T5-XL", "LLaMA-7B", "other"],
                         label="LLM type",
                         multiselect=False,
                         value="LLaMA-7B",
                         interactive=True,
                     )
                     LLM_name_textbox = gr.Textbox(
-                        label="LLM model (for other)",
                         placeholder="LLaMA-13B"
                     )
                     Evaluation_dimension = gr.Dropdown(

     else:
         content = input_file.decode("utf-8")
         prediction = prediction_analyse(content)
+        csv_data = pd.read_csv(CSV_DIR)
+        Start_dimension, End_dimension = 1, 13
+        if Evaluation_dimension == 'Image':
+            End_dimension = 10
+        elif Evaluation_dimension == 'Video':
+            Start_dimension = 10
+        each_task_accuracy = {i: round(prediction[i]["correct"] / prediction[i]["total"] * 100, 1) if i >= Start_dimension and i < End_dimension else 0 for i in range(1, 13)}
         # count for average image\video\all
         total_correct_image = sum(prediction[i]["correct"] for i in range(1, 10))
         total_image = sum(prediction[i]["total"] for i in range(1, 10))
         total_video = sum(prediction[i]["total"] for i in range(10, 13))
+        if Evaluation_dimension != 'Video':
+            average_accuracy_image = round(total_correct_image / total_image * 100, 1)
+        else:
+            average_accuracy_image = 0
+        if Evaluation_dimension != 'Image':
+            average_accuracy_video = round(total_correct_video / total_video * 100, 1)
+        else:
+            average_accuracy_video = 0
+        if Evaluation_dimension == 'All':
+            overall_accuracy = round((total_correct_image + total_correct_video) / (total_image + total_video) * 100, 1)
+        else:
+            overall_accuracy = 0
+        if LLM_type == 'Other':
             LLM_name = LLM_name_textbox
         else:
             LLM_name = LLM_type
+        if revision_name_textbox == '':
+            col = csv_data.shape[0]
+            model_name = model_name_textbox
+        else:
+            model_name = revision_name_textbox
+            model_name_list = csv_data['Model']
+            name_list = [name.split(']')[0][1:] for name in model_name_list]
+            if revision_name_textbox not in name_list:
+                col = csv_data.shape[0]
+            else:
+                col = name_list.index(revision_name_textbox)
         if model_link == '':
+            model_name = model_name  # no url
         else:
+            model_name = '[' + model_name + '](' + model_link + ')'
         # add new data
         new_data = [
             model_type,
             average_accuracy_video,
             overall_accuracy]
         # pdb.set_trace()
+        csv_data.loc[col] = new_data
+        csv_data = csv_data.to_csv(CSV_DIR, index=False)
     return 0
 def get_baseline_df():
         with gr.TabItem("🚀 Submit here! ", elem_id="seed-benchmark-tab-table", id=3):
             gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
+            with gr.Row():
+                gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
             with gr.Row():
                 gr.Markdown("# ✉️✨ Submit your model evaluation json file here!", elem_classes="markdown-text")
                 with gr.Column():
                     LLM_type = gr.Dropdown(
+                        choices=["Vicuna-7B", "Flan-T5-XL", "LLaMA-7B", "Other"],
                         label="LLM type",
                         multiselect=False,
                         value="LLaMA-7B",
                         interactive=True,
                     )
                     LLM_name_textbox = gr.Textbox(
+                        label="LLM model (for Other)",
                         placeholder="LLaMA-13B"
                     )
                     Evaluation_dimension = gr.Dropdown(