open_llm_leaderboard

Runtime error

App Files Files Community

gsaivinay commited on Jul 27, 2023

Commit

bbaf2d3

1 Parent(s): 3ddb851

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -10

app.py CHANGED Viewed

@@ -99,7 +99,6 @@ def get_leaderboard_df():
 def get_evaluation_queue_df():
-    # todo @saylortwift: replace the repo by the one you created for the eval queue
     if eval_queue:
         print("Pulling changes for the evaluation queue.")
         eval_queue.git_pull()
@@ -141,7 +140,7 @@ def get_evaluation_queue_df():
                 data["model"] = make_clickable_model(data["model"])
                 all_evals.append(data)
-    pending_list = [e for e in all_evals if e["status"] == "PENDING"]
     running_list = [e for e in all_evals if e["status"] == "RUNNING"]
     finished_list = [e for e in all_evals if e["status"].startswith("FINISHED")]
     df_pending = pd.DataFrame.from_records(pending_list, columns=EVAL_COLS)
@@ -179,6 +178,7 @@ def add_new_eval(
     precision: str,
     private: bool,
     weight_type: str,
 ):
     precision = precision.split(" ")[0]
     current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
@@ -209,6 +209,7 @@ def add_new_eval(
         "weight_type": weight_type,
         "status": "PENDING",
         "submitted_time": current_time,
     }
     user_name = ""
@@ -296,7 +297,7 @@ with demo:
             )
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark (lite)", elem_id="llm-benchmark-tab-table", id=0):
             leaderboard_table_lite = gr.components.Dataframe(
                 value=leaderboard_df[COLS_LITE],
                 headers=COLS_LITE,
@@ -318,7 +319,7 @@ with demo:
                 leaderboard_table_lite,
             )
-        with gr.TabItem("📊 Extended view", elem_id="llm-benchmark-tab-table", id=1):
             leaderboard_table = gr.components.Dataframe(
                 value=leaderboard_df,
                 headers=COLS,
@@ -340,16 +341,16 @@ with demo:
                 [hidden_leaderboard_table_for_search, search_bar],
                 leaderboard_table,
             )
-        with gr.TabItem("About", elem_id="llm-benchmark-tab-table", id=2):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("✉️✨ Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
             with gr.Column():
                 with gr.Row():
                     gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
                 with gr.Column():
-                    with gr.Accordion(f"✅ Finished Evaluations: {len(finished_eval_queue_df)}", open=False):
                         with gr.Row():
                             finished_eval_table = gr.components.Dataframe(
                                 value=finished_eval_queue_df,
@@ -357,7 +358,7 @@ with demo:
                                 datatype=EVAL_TYPES,
                                 max_rows=5,
                             )
-                    with gr.Accordion(f"🔄 Running Evaluation Queue: {len(running_eval_queue_df)}", open=False):
                         with gr.Row():
                             running_eval_table = gr.components.Dataframe(
                                 value=running_eval_queue_df,
@@ -366,7 +367,7 @@ with demo:
                                 max_rows=5,
                             )
-                    with gr.Accordion(f"⏳ Pending Evaluation Queue: {len(pending_eval_queue_df)}", open=False):
                         with gr.Row():
                             pending_eval_table = gr.components.Dataframe(
                                 value=pending_eval_queue_df,
@@ -374,6 +375,63 @@ with demo:
                                 datatype=EVAL_TYPES,
                                 max_rows=5,
                             )
         with gr.Row():
             refresh_button = gr.Button("Refresh")
@@ -407,4 +465,4 @@ with demo:
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=3600)
 scheduler.start()
-demo.queue(concurrency_count=40).launch()

 def get_evaluation_queue_df():
     if eval_queue:
         print("Pulling changes for the evaluation queue.")
         eval_queue.git_pull()
                 data["model"] = make_clickable_model(data["model"])
                 all_evals.append(data)
+    pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
     running_list = [e for e in all_evals if e["status"] == "RUNNING"]
     finished_list = [e for e in all_evals if e["status"].startswith("FINISHED")]
     df_pending = pd.DataFrame.from_records(pending_list, columns=EVAL_COLS)
     precision: str,
     private: bool,
     weight_type: str,
+    model_type: str,
 ):
     precision = precision.split(" ")[0]
     current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
         "weight_type": weight_type,
         "status": "PENDING",
         "submitted_time": current_time,
+        "model_type": model_type,
     }
     user_name = ""
             )
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
             leaderboard_table_lite = gr.components.Dataframe(
                 value=leaderboard_df[COLS_LITE],
                 headers=COLS_LITE,
                 leaderboard_table_lite,
             )
+        with gr.TabItem("🔍 Extended model view", elem_id="llm-benchmark-tab-table", id=1):
             leaderboard_table = gr.components.Dataframe(
                 value=leaderboard_df,
                 headers=COLS,
                 [hidden_leaderboard_table_for_search, search_bar],
                 leaderboard_table,
             )
+        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
             with gr.Column():
                 with gr.Row():
                     gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
                 with gr.Column():
+                    with gr.Accordion(f"✅ Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
                         with gr.Row():
                             finished_eval_table = gr.components.Dataframe(
                                 value=finished_eval_queue_df,
                                 datatype=EVAL_TYPES,
                                 max_rows=5,
                             )
+                    with gr.Accordion(f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
                         with gr.Row():
                             running_eval_table = gr.components.Dataframe(
                                 value=running_eval_queue_df,
                                 max_rows=5,
                             )
+                    with gr.Accordion(f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
                         with gr.Row():
                             pending_eval_table = gr.components.Dataframe(
                                 value=pending_eval_queue_df,
                                 datatype=EVAL_TYPES,
                                 max_rows=5,
                             )
+            with gr.Row():
+                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
+            with gr.Row():
+                with gr.Column():
+                    model_name_textbox = gr.Textbox(label="Model name")
+                    revision_name_textbox = gr.Textbox(
+                        label="revision", placeholder="main"
+                    )
+                    private = gr.Checkbox(
+                        False, label="Private", visible=not IS_PUBLIC
+                    )
+                    model_type = gr.Dropdown(
+                        choices=["pretrained", "fine-tuned", "with RL"],
+                        label="Model type",
+                        multiselect=False,
+                        value="pretrained",
+                        max_choices=1,
+                        interactive=True,
+                    )
+                with gr.Column():
+                    precision = gr.Dropdown(
+                        choices=["float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)"],
+                        label="Precision",
+                        multiselect=False,
+                        value="float16",
+                        max_choices=1,
+                        interactive=True,
+                    )
+                    weight_type = gr.Dropdown(
+                        choices=["Original", "Delta", "Adapter"],
+                        label="Weights type",
+                        multiselect=False,
+                        value="Original",
+                        max_choices=1,
+                        interactive=True,
+                    )
+                    base_model_name_textbox = gr.Textbox(
+                        label="Base model (for delta or adapter weights)"
+                    )
+            submit_button = gr.Button("Submit Eval")
+            submission_result = gr.Markdown()
+            submit_button.click(
+                add_new_eval,
+                [
+                    model_name_textbox,
+                    base_model_name_textbox,
+                    revision_name_textbox,
+                    precision,
+                    private,
+                    weight_type,
+                    model_type
+                ],
+                submission_result,
+            )
         with gr.Row():
             refresh_button = gr.Button("Refresh")
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=3600)
 scheduler.start()
+demo.queue(concurrency_count=40).launch()