# import gradio as gr # from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns # import pandas as pd # from apscheduler.schedulers.background import BackgroundScheduler # from huggingface_hub import snapshot_download # from src.about import ( # CITATION_BUTTON_LABEL, # CITATION_BUTTON_TEXT, # EVALUATION_QUEUE_TEXT, # INTRODUCTION_TEXT, # LLM_BENCHMARKS_TEXT, # TITLE, # ) # from src.display.css_html_js import custom_css # from src.display.utils import ( # BENCHMARK_COLS, # COLS, # EVAL_COLS, # EVAL_TYPES, # AutoEvalColumn, # ModelType, # fields, # WeightType, # Precision # ) # from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN # from src.populate import get_evaluation_queue_df, get_leaderboard_df # from src.submission.submit import add_new_eval # def restart_space(): # API.restart_space(repo_id=REPO_ID) # ### Space initialisation # try: # print(EVAL_REQUESTS_PATH) # snapshot_download( # repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN # ) # except Exception: # restart_space() # try: # print(EVAL_RESULTS_PATH) # snapshot_download( # repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN # ) # except Exception: # restart_space() # LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS) # ( # finished_eval_queue_df, # running_eval_queue_df, # pending_eval_queue_df, # ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS) # def init_leaderboard(dataframe): # if dataframe is None or dataframe.empty: # raise ValueError("Leaderboard DataFrame is empty or None.") # return Leaderboard( # value=dataframe, # datatype=[c.type for c in fields(AutoEvalColumn)], # select_columns=SelectColumns( # default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default], # cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden], # label="Select Columns to Display:", # ), # search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name], # hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden], # filter_columns=[ # ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"), # ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"), # ColumnFilter( # AutoEvalColumn.params.name, # type="slider", # min=0.01, # max=150, # label="Select the number of parameters (B)", # ), # ColumnFilter( # AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True # ), # ], # bool_checkboxgroup_label="Hide models", # interactive=False, # ) # demo = gr.Blocks(css=custom_css) # with demo: # gr.HTML(TITLE) # gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") # with gr.Tabs(elem_classes="tab-buttons") as tabs: # with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0): # leaderboard = init_leaderboard(LEADERBOARD_DF) # with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2): # gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") # with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3): # with gr.Column(): # with gr.Row(): # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") # with gr.Column(): # with gr.Accordion( # f"✅ Finished Evaluations ({len(finished_eval_queue_df)})", # open=False, # ): # with gr.Row(): # finished_eval_table = gr.components.Dataframe( # value=finished_eval_queue_df, # headers=EVAL_COLS, # datatype=EVAL_TYPES, # row_count=5, # ) # with gr.Accordion( # f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})", # open=False, # ): # with gr.Row(): # running_eval_table = gr.components.Dataframe( # value=running_eval_queue_df, # headers=EVAL_COLS, # datatype=EVAL_TYPES, # row_count=5, # ) # with gr.Accordion( # f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", # open=False, # ): # with gr.Row(): # pending_eval_table = gr.components.Dataframe( # value=pending_eval_queue_df, # headers=EVAL_COLS, # datatype=EVAL_TYPES, # row_count=5, # ) # with gr.Row(): # gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text") # with gr.Row(): # with gr.Column(): # model_name_textbox = gr.Textbox(label="Model name") # revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main") # model_type = gr.Dropdown( # choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown], # label="Model type", # multiselect=False, # value=None, # interactive=True, # ) # with gr.Column(): # precision = gr.Dropdown( # choices=[i.value.name for i in Precision if i != Precision.Unknown], # label="Precision", # multiselect=False, # value="float16", # interactive=True, # ) # weight_type = gr.Dropdown( # choices=[i.value.name for i in WeightType], # label="Weights type", # multiselect=False, # value="Original", # interactive=True, # ) # base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)") # submit_button = gr.Button("Submit Eval") # submission_result = gr.Markdown() # submit_button.click( # add_new_eval, # [ # model_name_textbox, # base_model_name_textbox, # revision_name_textbox, # precision, # weight_type, # model_type, # ], # submission_result, # ) # with gr.Row(): # with gr.Accordion("📙 Citation", open=False): # citation_button = gr.Textbox( # value=CITATION_BUTTON_TEXT, # label=CITATION_BUTTON_LABEL, # lines=20, # elem_id="citation-button", # show_copy_button=True, # ) # scheduler = BackgroundScheduler() # scheduler.add_job(restart_space, "interval", seconds=1800) # scheduler.start() # demo.queue(default_concurrency_limit=40).launch() __all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions'] import os import gradio as gr import pandas as pd import json import tempfile from constants import * from huggingface_hub import Repository HF_TOKEN = os.environ.get("HF_TOKEN") global data_component, filter_component def download_csv(): # pull the results and return this file! submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset") submission_repo.git_pull() return CSV_DIR, gr.update(visible=True) def upload_file(files): file_paths = [file.name for file in files] return file_paths def add_new_eval( input_file, model_name_textbox: str, revision_name_textbox: str, model_type: str, model_link: str, model_size: str, LLM_type: str, LLM_name_textbox: str, ): if input_file is None: return "Error! Empty file!" upload_data=json.loads(input_file) submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset") submission_repo.git_pull() csv_data = pd.read_csv(CSV_DIR) if LLM_type == 'Other': LLM_name = LLM_name_textbox else: LLM_name = LLM_type if revision_name_textbox == '': col = csv_data.shape[0] model_name = model_name_textbox else: model_name = revision_name_textbox model_name_list = csv_data['Model'] name_list = [name.split(']')[0][1:] for name in model_name_list] if revision_name_textbox not in name_list: col = csv_data.shape[0] else: col = name_list.index(revision_name_textbox) if model_link == '': model_name = model_name # no url else: model_name = '[' + model_name + '](' + model_link + ')' # add new data new_data = [ model_type, model_name, LLM_name ] for key in TASK_INFO: if key in upload_data: new_data.append(upload_data[key]) else: new_data.append(0) csv_data.loc[col] = new_data csv_data = csv_data.to_csv(CSV_DIR, index=False) submission_repo.push_to_hub() return 0 def get_baseline_df(): submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset") submission_repo.git_pull() df = pd.read_csv(CSV_DIR) df = df.sort_values(by="Avg", ascending=False) present_columns = MODEL_INFO + checkbox_group.value df = df[present_columns] return df def get_all_df(): submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset") submission_repo.git_pull() df = pd.read_csv(CSV_DIR) df = df.sort_values(by="Avg", ascending=False) return df def on_filter_model_size_method_change(selected_columns): updated_data = get_all_df() # columns: selected_columns = [item for item in TASK_INFO if item in selected_columns] present_columns = MODEL_INFO + selected_columns # print("selected_columns",'|'.join(selected_columns)) updated_data = updated_data[present_columns] updated_data = updated_data.sort_values(by=selected_columns[0], ascending=False) updated_headers = present_columns update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers] # print(updated_data,present_columns,update_datatype) filter_component = gr.components.Dataframe( value=updated_data, headers=updated_headers, type="pandas", datatype=update_datatype, interactive=False, visible=True, ) return filter_component#.value block = gr.Blocks() with block: gr.Markdown( LEADERBORAD_INTRODUCTION ) with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.TabItem("📊 MVBench", elem_id="mvbench-tab-table", id=1): with gr.Row(): with gr.Accordion("Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id="citation-button", lines=10, ) gr.Markdown( TABLE_INTRODUCTION ) # selection for column part: checkbox_group = gr.CheckboxGroup( choices=TASK_INFO, value=AVG_INFO, label="Evaluation Dimension", interactive=True, ) data_component = gr.components.Dataframe( value=get_baseline_df, headers=COLUMN_NAMES, type="pandas", datatype=DATA_TITILE_TYPE, interactive=False, visible=True, ) checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[ checkbox_group], outputs=data_component) # table 2 with gr.TabItem("📝 About", elem_id="mvbench-tab-table", id=2): gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text") # table 3 with gr.TabItem("🚀 Submit here! ", elem_id="mvbench-tab-table", id=3): gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text") with gr.Row(): gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text") with gr.Row(): gr.Markdown("# ✉️✨ Submit your model evaluation json file here!", elem_classes="markdown-text") with gr.Row(): with gr.Column(): model_name_textbox = gr.Textbox( label="Model name", placeholder="LLaMA-7B" ) revision_name_textbox = gr.Textbox( label="Revision Model Name", placeholder="LLaMA-7B" ) model_type = gr.Dropdown( choices=[ "LLM", "ImageLLM", "VideoLLM", "Other", ], label="Model type", multiselect=False, value="ImageLLM", interactive=True, ) with gr.Column(): LLM_type = gr.Dropdown( choices=["Vicuna-7B", "Flan-T5-XL", "LLaMA-7B", "InternLM-7B", "Other"], label="LLM type", multiselect=False, value="LLaMA-7B", interactive=True, ) LLM_name_textbox = gr.Textbox( label="LLM model (for Other)", placeholder="LLaMA-13B" ) model_link = gr.Textbox( label="Model Link", placeholder="https://huggingface.co/decapoda-research/llama-7b-hf" ) model_size = gr.Textbox( label="Model size", placeholder="7B(Input content format must be 'number+B' or '-')" ) with gr.Column(): input_file = gr.components.File(label = "Click to Upload a json File", file_count="single", type='binary') submit_button = gr.Button("Submit Eval") submission_result = gr.Markdown() submit_button.click( add_new_eval, inputs = [ input_file, model_name_textbox, revision_name_textbox, model_type, model_link, model_size, LLM_type, LLM_name_textbox, ], ) def refresh_data(): value1 = get_baseline_df() return value1 with gr.Row(): data_run = gr.Button("Refresh") with gr.Row(): result_download = gr.Button("Download Leaderboard") file_download = gr.File(label="download the csv of leaderborad.", visible=False) data_run.click(on_filter_model_size_method_change, inputs=[checkbox_group], outputs=data_component) result_download.click(download_csv, inputs=None, outputs= [file_download,file_download]) block.launch()