LLM-Inference-Benchmark

Running

App Files Files Community

rbgo commited on Sep 16, 2024

Commit

ca89999

verified ·

1 Parent(s): 6a8d197

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -199

app.py CHANGED Viewed

@@ -1,204 +1,124 @@
 import gradio as gr
-from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 import pandas as pd
-from apscheduler.schedulers.background import BackgroundScheduler
-from huggingface_hub import snapshot_download
-from src.about import (
-    CITATION_BUTTON_LABEL,
-    CITATION_BUTTON_TEXT,
-    EVALUATION_QUEUE_TEXT,
-    INTRODUCTION_TEXT,
-    LLM_BENCHMARKS_TEXT,
-    TITLE,
-)
-from src.display.css_html_js import custom_css
-from src.display.utils import (
-    BENCHMARK_COLS,
-    COLS,
-    EVAL_COLS,
-    EVAL_TYPES,
-    AutoEvalColumn,
-    ModelType,
-    fields,
-    WeightType,
-    Precision
-)
-from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
-from src.populate import get_evaluation_queue_df, get_leaderboard_df
-from src.submission.submit import add_new_eval
-def restart_space():
-    API.restart_space(repo_id=REPO_ID)
-### Space initialisation
-try:
-    print(EVAL_REQUESTS_PATH)
-    snapshot_download(
-        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-except Exception:
-    restart_space()
-try:
-    print(EVAL_RESULTS_PATH)
-    snapshot_download(
-        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-except Exception:
-    restart_space()
-LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
-(
-    finished_eval_queue_df,
-    running_eval_queue_df,
-    pending_eval_queue_df,
-) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
-def init_leaderboard(dataframe):
-    if dataframe is None or dataframe.empty:
-        raise ValueError("Leaderboard DataFrame is empty or None.")
-    return Leaderboard(
-        value=dataframe,
-        datatype=[c.type for c in fields(AutoEvalColumn)],
-        select_columns=SelectColumns(
-            default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
-            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
-            label="Select Columns to Display:",
-        ),
-        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
-        hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
-        filter_columns=[
-            ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
-            ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
-            ColumnFilter(
-                AutoEvalColumn.params.name,
-                type="slider",
-                min=0.01,
-                max=150,
-                label="Select the number of parameters (B)",
-            ),
-            ColumnFilter(
-                AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
-            ),
-        ],
-        bool_checkboxgroup_label="Hide models",
-        interactive=False,
     )
-demo = gr.Blocks(css=custom_css)
-with demo:
-    gr.HTML(TITLE)
-    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
-    with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
-            leaderboard = init_leaderboard(LEADERBOARD_DF)
-        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
-            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
-            with gr.Column():
-                with gr.Row():
-                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
-                with gr.Column():
-                    with gr.Accordion(
-                        f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            finished_eval_table = gr.components.Dataframe(
-                                value=finished_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            running_eval_table = gr.components.Dataframe(
-                                value=running_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            pending_eval_table = gr.components.Dataframe(
-                                value=pending_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-            with gr.Row():
-                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
-            with gr.Row():
-                with gr.Column():
-                    model_name_textbox = gr.Textbox(label="Model name")
-                    revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
-                    model_type = gr.Dropdown(
-                        choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
-                        label="Model type",
-                        multiselect=False,
-                        value=None,
-                        interactive=True,
-                    )
-                with gr.Column():
-                    precision = gr.Dropdown(
-                        choices=[i.value.name for i in Precision if i != Precision.Unknown],
-                        label="Precision",
-                        multiselect=False,
-                        value="float16",
-                        interactive=True,
-                    )
-                    weight_type = gr.Dropdown(
-                        choices=[i.value.name for i in WeightType],
-                        label="Weights type",
-                        multiselect=False,
-                        value="Original",
-                        interactive=True,
-                    )
-                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
-            submit_button = gr.Button("Submit Eval")
-            submission_result = gr.Markdown()
-            submit_button.click(
-                add_new_eval,
-                [
-                    model_name_textbox,
-                    base_model_name_textbox,
-                    revision_name_textbox,
-                    precision,
-                    weight_type,
-                    model_type,
-                ],
-                submission_result,
-            )
-    with gr.Row():
-        with gr.Accordion("📙 Citation", open=False):
-            citation_button = gr.Textbox(
-                value=CITATION_BUTTON_TEXT,
-                label=CITATION_BUTTON_LABEL,
-                lines=20,
-                elem_id="citation-button",
-                show_copy_button=True,
-            )
-scheduler = BackgroundScheduler()
-scheduler.add_job(restart_space, "interval", seconds=1800)
-scheduler.start()
-demo.queue(default_concurrency_limit=40).launch()

 import gradio as gr
 import pandas as pd
+import os
+import shutil
+# Description and Introduction texts
+DESCRIPTION = """
+Independent performance benchmark of LLMs with various Inference Engines. Definitions are below the table.
+"""
+INTRODUCTION = """
+**Introduction**
+In our ongoing quest to help developers find the right libraries and LLMs for their use cases.
+We tested them across six different inference engines (vLLM, TGI, TensorRT-LLM, Tritonvllm, Deepspeed-mii, ctranslate) on A100 GPUs hosted on Azure, ensuring a neutral playing field separate from our Inferless platform.
+The goal?
+To help developers, researchers, and AI enthusiasts pinpoint the best LLMs for their needs, whether for development or production.
+"""
+HOW_WE_TESTED = """
+**How we tested?**
+Here's how we ensured consistent, reliable benchmarks:
+* **Platform:** All tests ran on A100 GPUs from Azure, providing a level playing field.
+* **Setup:** Docker containers for each library ensured a consistent environment.
+* **Configuration:** Standard settings (temperature 0.5, top_p 1) kept the focus on performance, not external variables.
+* **Prompts & Token Ranges:** We used six distinct prompts with input lengths from 20 to 2,000 tokens and tested generation lengths of 100, 200, and 500 tokens to evaluate each library's flexibility.
+* **Models & Libraries Tested:** We evaluated Phi-3-medium-128k-instruct, Meta-Llama-3.1-8B-Instruct, Mistral-7B-Instruct-v0.3, Qwen2-7B-Instruct, and Gemma-2-9b-it using Text Generation Inference (TGI), vLLM, DeepSpeed Mii, CTranslate2, Triton with vLLM Backend, and TensorRT-LLM.
+"""
+# Replace 'path/to/your/csv/folder' with the actual path to your folder containing CSV files
+csv_folder_path = 'result_csv/'
+# Function to read all CSV files from a folder and rearrange columns
+def read_and_process_csv_files(folder_path):
+    all_data = []
+    for filename in os.listdir(folder_path):
+        if filename.endswith('.csv'):
+            file_path = os.path.join(folder_path, filename)
+            df = pd.read_csv(file_path)
+            all_data.append(df)
+    combined_df = pd.concat(all_data, ignore_index=True)
+    # Rearrange columns
+    columns_order = [
+        "Model_Name", "Library", "TTFT", "Tokens-per-Second", "Token_Count",
+        "Input_Tokens", "Output_Tokens", "Input", "Output"
+    ]
+    # Ensure all required columns exist, if not, create them with NaN values
+    for col in columns_order:
+        if col not in combined_df.columns:
+            combined_df[col] = pd.NA
+    # Select and order the columns
+    return combined_df[columns_order]
+df = read_and_process_csv_files(csv_folder_path)
+def get_leaderboard_df():
+    return df
+def add_new_entry(file):
+    global df
+    if file is None:
+        return df, "No file uploaded."
+    # Read the uploaded CSV file
+    new_df = pd.read_csv(file.name)
+    # Rearrange columns to match the existing DataFrame
+    columns_order = [
+        "Model_Name", "Library", "TTFT", "Tokens-per-Second", "Token_Count",
+        "Input_Tokens", "Output_Tokens", "Input", "Output"
+    ]
+    for col in columns_order:
+        if col not in new_df.columns:
+            new_df[col] = pd.NA
+    new_df = new_df[columns_order]
+    # Append the new data to the existing DataFrame
+    df = pd.concat([df, new_df], ignore_index=True)
+    # Save the uploaded file to the CSV folder
+    filename = os.path.basename(file.name)
+    destination = os.path.join(csv_folder_path, filename)
+    shutil.copy(file.name, destination)
+    return df, f"File '{filename}' uploaded and data added successfully!"
+with gr.Blocks() as demo:
+    gr.Markdown("# LLM Inference Leaderboard")
+    # About section at the top
+    with gr.Column():
+        gr.Markdown("---")
+        gr.Markdown(DESCRIPTION)
+        gr.Markdown(INTRODUCTION)
+        gr.Markdown("---")
+    # Tabs for Leaderboard and Add New Entry
+    with gr.Tabs():
+        with gr.TabItem("Leaderboard"):
+            leaderboard = gr.DataFrame(df)
+        with gr.TabItem("Add New Entry"):
+            file_upload = gr.File(label="Upload CSV File")
+            submit_button = gr.Button("Add Entry")
+            result = gr.Markdown()
+    # How we tested section at the bottom
+    with gr.Column():
+        gr.Markdown("---")
+        gr.Markdown(HOW_WE_TESTED)
+    submit_button.click(
+        add_new_entry,
+        inputs=[file_upload],
+        outputs=[leaderboard, result]
     )
+    demo.load(get_leaderboard_df, outputs=[leaderboard])
+demo.launch()