Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| from functools import partial | |
| import gradio as gr | |
| from src.constants import SUBTASKS, TASKS | |
| from src.details import update_subtasks_component, update_load_details_component, load_details_dataframes, \ | |
| display_details, update_sample_idx_component, clear_details | |
| from src.results import update_load_results_component, \ | |
| load_results_dataframes, display_results, update_tasks_component, clear_results, \ | |
| sort_result_paths_per_model, fetch_result_paths | |
| # if __name__ == "__main__": | |
| result_paths_per_model = sort_result_paths_per_model(fetch_result_paths()) | |
| load_results_dataframes = partial(load_results_dataframes, result_paths_per_model=result_paths_per_model) | |
| with gr.Blocks(fill_height=True, fill_width=True) as demo: | |
| gr.HTML("<h1 style='text-align: center;'>Compare Results of the π€ Open LLM Leaderboard</h1>") | |
| gr.HTML("<h3 style='text-align: center;'>Select 2 models to load and compare their results</h3>") | |
| gr.Markdown("Compare Results of the π€ [Open LLM Leaderboard](https://huggingface.co/spaces/open-llm-leaderboard-old/open_llm_leaderboard). " | |
| "Check out the [documentation](https://huggingface.co/docs/leaderboards/open_llm_leaderboard/about) π to find explanations on the evaluations used, their configuration parameters and details on the input/outputs for the models." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| model_id_1 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models") | |
| dataframe_1 = gr.Dataframe(visible=False) | |
| with gr.Column(): | |
| model_id_2 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models") | |
| dataframe_2 = gr.Dataframe(visible=False) | |
| with gr.Row(): | |
| with gr.Tab("Results"): | |
| load_results_btn = gr.Button("Load", interactive=False) | |
| clear_results_btn = gr.Button("Clear") | |
| results_task = gr.Radio( | |
| ["All"] + list(TASKS.values()), | |
| label="Tasks", | |
| info="Evaluation tasks to be displayed", | |
| value="All", | |
| visible=False, | |
| ) | |
| results = gr.HTML() | |
| with gr.Tab("Configs"): | |
| load_configs_btn = gr.Button("Load", interactive=False) | |
| clear_configs_btn = gr.Button("Clear") | |
| configs_task = gr.Radio( | |
| ["All"] + list(TASKS.values()), | |
| label="Tasks", | |
| info="Evaluation tasks to be displayed", | |
| value="All", | |
| visible=False, | |
| ) | |
| configs = gr.HTML() | |
| with gr.Tab("Details"): | |
| details_task = gr.Radio( | |
| list(TASKS.values()), | |
| label="Tasks", | |
| info="Evaluation tasks to be loaded", | |
| interactive=True, | |
| ) | |
| subtask = gr.Radio( | |
| SUBTASKS.get(details_task.value), | |
| label="Subtasks", | |
| info="Evaluation subtasks to be loaded (choose one of the Tasks above)", | |
| ) | |
| load_details_btn = gr.Button("Load Details", interactive=False) | |
| clear_details_btn = gr.Button("Clear Details") | |
| sample_idx = gr.Number( | |
| label="Sample Index", | |
| info="Index of the sample to be displayed", | |
| value=0, | |
| minimum=0, | |
| visible=False | |
| ) | |
| details = gr.HTML() | |
| details_dataframe_1 = gr.Dataframe(visible=False) | |
| details_dataframe_2 = gr.Dataframe(visible=False) | |
| details_dataframe = gr.DataFrame(visible=False) | |
| gr.on( | |
| triggers=[model_id_1.input, model_id_2.input], | |
| fn=update_load_results_component, | |
| outputs=[load_results_btn, load_configs_btn], | |
| ) | |
| gr.on( | |
| triggers=[load_results_btn.click, load_configs_btn.click], | |
| fn=load_results_dataframes, | |
| inputs=[model_id_1, model_id_2], | |
| outputs=[dataframe_1, dataframe_2], | |
| ).then( | |
| fn=update_tasks_component, | |
| outputs=[results_task, configs_task], | |
| ) | |
| # Synchronize the results_task and configs_task radio buttons | |
| results_task.input(fn=lambda task: task, inputs=results_task, outputs=configs_task) | |
| configs_task.input(fn=lambda task: task, inputs=configs_task, outputs=results_task) | |
| gr.on( | |
| triggers=[dataframe_1.change, dataframe_2.change, results_task.change], | |
| fn=display_results, | |
| inputs=[results_task, dataframe_1, dataframe_2], | |
| outputs=[results, configs], | |
| ) | |
| gr.on( | |
| triggers=[clear_results_btn.click, clear_configs_btn.click], | |
| fn=clear_results, | |
| outputs=[model_id_1, model_id_2, dataframe_1, dataframe_2, load_results_btn, load_configs_btn, results_task, configs_task], | |
| ) | |
| details_task.change( | |
| fn=update_subtasks_component, | |
| inputs=details_task, | |
| outputs=subtask, | |
| ) | |
| gr.on( | |
| triggers=[model_id_1.input, model_id_2.input, subtask.input, details_task.input], | |
| fn=update_load_details_component, | |
| inputs=[model_id_1, model_id_2, subtask], | |
| outputs=load_details_btn, | |
| ) | |
| load_details_btn.click( | |
| fn=load_details_dataframes, | |
| inputs=[subtask, model_id_1, model_id_2], | |
| outputs=[details_dataframe_1, details_dataframe_2], | |
| ).then( | |
| fn=update_sample_idx_component, | |
| inputs=[details_dataframe_1, details_dataframe_2], | |
| outputs=sample_idx, | |
| ) | |
| gr.on( | |
| triggers=[details_dataframe_1.change, details_dataframe_2.change, sample_idx.change], | |
| fn=display_details, | |
| inputs=[sample_idx, details_dataframe_1, details_dataframe_2], | |
| outputs=details, | |
| ) | |
| clear_details_btn.click( | |
| fn=clear_details, | |
| outputs=[model_id_1, model_id_2, details_dataframe_1, details_dataframe_2, details_task, subtask, load_details_btn, sample_idx], | |
| ) | |
| demo.launch() | |