Spaces:
Running
Running
| """A gradio app that renders a static leaderboard. This is used for Hugging Face Space.""" | |
| import ast | |
| import argparse | |
| import glob | |
| import pickle | |
| import gradio as gr | |
| import numpy as np | |
| import pandas as pd | |
| import gradio as gr | |
| import pandas as pd | |
| from pathlib import Path | |
| import json | |
| from constants import BANNER, INTRODUCTION_TEXT, CITATION_TEXT, METRICS_TAB_TEXT, DIR_OUTPUT_REQUESTS | |
| from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub | |
| from utils_display import AutoEvalColumn, fields, make_clickable_model, styled_error, styled_message | |
| from datetime import datetime, timezone | |
| LAST_UPDATED = "Feb 27th 2024" | |
| css = """ | |
| .markdown-text{font-size: 16pt} | |
| th { | |
| text-align: center; | |
| } | |
| td { | |
| font-size: 16px; /* Adjust the font size as needed */ | |
| text-align: center; | |
| } | |
| """ | |
| column_names = { | |
| "model": "Model", | |
| "Overall": "All π―", | |
| "Turn 1": "Turn 1οΈβ£", | |
| "Turn 2": "Turn 2οΈβ£", | |
| } | |
| model_info = { | |
| "gpt-4": {"hf_name": "https://platform.openai.com/", "pretty_name": "gpt-4"}, | |
| "gpt-3.5-turbo": {"hf_name": "https://platform.openai.com/", "pretty_name": "gpt-3.5-turbo"}, | |
| "Llama-2-70b-hf": {"hf_name": "meta-llama/Llama-2-70b-hf", "pretty_name": "Llama-2-70B"}, | |
| "Llama-2-13b-hf": {"hf_name": "meta-llama/Llama-2-13b-hf", "pretty_name": "Llama-2-13B"}, | |
| "Llama-2-7b-hf": {"hf_name": "meta-llama/Llama-2-7b-hf", "pretty_name": "Llama-2-7B"}, | |
| "Mixtral-8x7B-v0.1": {"hf_name": "mistralai/Mixtral-8x7B-v0.1", "pretty_name": "Mixtral-8x7B"}, | |
| "Mistral-7b-v0.1": {"hf_name": "mistralai/Mistral-7B-v0.1", "pretty_name": "Mistral-7B"}, | |
| "Yi-34B": {"hf_name": "01-ai/Yi-34B", "pretty_name": "Yi-34B"}, | |
| "Yi-6B": {"hf_name": "01-ai/Yi-6B", "pretty_name": "Yi-6B"}, | |
| "gemma-7b": {"hf_name": "google/gemma-7b", "pretty_name": "Gemma-7B"}, | |
| "gemma-2b": {"hf_name": "google/gemma-2b", "pretty_name": "Gemma-2B"}, | |
| "phi-2": {"hf_name": "microsoft/phi-2", "pretty_name": "Phi-2 (2.7B)"}, | |
| "olmo": {"hf_name": "allenai/OLMo-7B", "pretty_name": "OLMo-7B"}, | |
| } | |
| # Formats the columns | |
| def formatter(x): | |
| if type(x) is str: | |
| x = x | |
| else: | |
| x = round(x, 2) | |
| return x | |
| def build_demo(original_df, TYPES): | |
| with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo: | |
| # gr.HTML(BANNER, elem_id="banner") | |
| gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
| with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
| with gr.TabItem("π Leaderboard", elem_id="od-benchmark-tab-table", id=0): | |
| leaderboard_table = gr.components.Dataframe( | |
| value=original_df, | |
| datatype=TYPES, | |
| height=1000, | |
| wrap=False, | |
| elem_id="leaderboard-table", | |
| interactive=False, | |
| visible=True, | |
| min_width=60, | |
| ) | |
| with gr.TabItem("π Metrics", elem_id="od-benchmark-tab-table", id=1): | |
| gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text") | |
| gr.Markdown(f"Last updated on **{LAST_UPDATED}**", elem_classes="markdown-text") | |
| with gr.Row(): | |
| with gr.Accordion("π Citation", open=False): | |
| gr.Textbox( | |
| value=CITATION_TEXT, lines=7, | |
| label="Copy the BibTeX to cite URIAL and MT-Bench", | |
| elem_id="citation-button", | |
| show_copy_button=True) | |
| # ).style(show_copy_button=True) | |
| return demo | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--share", action="store_true") | |
| parser.add_argument("--result_file", help="Path to results table", default="leaderboard_data.jsonl") | |
| args = parser.parse_args() | |
| bench_results = args.result_file | |
| original_df = pd.read_json(bench_results, lines=True) | |
| print(original_df.columns) | |
| for col in original_df.columns: | |
| if col == "model": | |
| original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x, model_info))) | |
| else: | |
| original_df[col] = original_df[col].apply(formatter) # For numerical values | |
| # Define the first column explicitly, add 'Overall' as the second column, and then append the rest excluding 'Overall' | |
| new_order = [original_df.columns[0], 'Overall'] + [col for col in original_df.columns if col not in [original_df.columns[0], 'Overall']] | |
| # Reorder the DataFrame columns using the new order | |
| reordered_df = original_df[new_order] | |
| reordered_df.sort_values(by='Overall', inplace=True, ascending=False) | |
| reordered_df.rename(columns=column_names, inplace=True) | |
| # COLS = [c.name for c in fields(AutoEvalColumn)] | |
| # TYPES = [c.type for c in fields(AutoEvalColumn)] | |
| TYPES = ["markdown", "number"] | |
| demo = build_demo(reordered_df, TYPES) | |
| demo.launch(share=args.share) | |