Spaces:
Running
Running
| # gradio display leaderboard | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib | |
| # matplotlib.use('macosx') | |
| import gradio as gr | |
| import matplotlib.pyplot as plt | |
| import plotly.graph_objects as go | |
| from apscheduler.schedulers.background import BackgroundScheduler | |
| from texts import INTRODUCTION_TEXT, TITLE | |
| from leaderboards import eng_leaderboards, chi_leaderboards | |
| from opseval_datasets import * | |
| # df_lang = { | |
| # 'English': pd.read_csv("./leaderboard/wired_network_en.csv"), | |
| # 'Chinese': pd.read_csv("./leaderboard/wired_network_zh.csv"), | |
| # } | |
| def create_lang_tabs(lang, lang_cates): | |
| df_dict = {} | |
| for dataset, cates in lang_cates: | |
| dataset_dt = {} | |
| for cat in cates: | |
| leaderboard_df = pd.read_csv(f'./data/{dataset}_{lang}_{cat}.csv') | |
| dataset_dt[cat] = leaderboard_df | |
| df_dict[dataset] = dataset_dt | |
| return df_dict | |
| dict_lang = { | |
| 'English': create_lang_tabs('en', eng_leaderboards), | |
| 'Chinese': create_lang_tabs('zh', chi_leaderboards) | |
| } | |
| def process_mc_df(df, shot=None): | |
| # 将name列重命名为Model | |
| df = df.rename(columns={"name": "Model"}) | |
| # 将zero_naive, zero_self_con, zero_cot, zero_cot_self_con, few_naive, few_self_con, few_cot, few_cot_self_con列重新组织成MultiIndex,一层为Zeroshot, Fewshot,一层为Naive, Self-Consistency, CoT, CoT+Self-Consistency | |
| df = df.set_index("Model") | |
| # df = df.stack().unstack() | |
| df.columns = pd.MultiIndex.from_tuples([("Zeroshot", "Naive"), ("Zeroshot", "SC"), ("Zeroshot", "CoT"), ("Zeroshot", "CoT+SC"), ("Fewshot", "Naive"), ("Fewshot", "SC"), ("Fewshot", "CoT"), ("Fewshot", "CoT+SC")]) | |
| # 保留shot的列,比如如果shot=Zeroshot那么只有Zeroshot的列会被保留 | |
| if shot: | |
| df = df[shot] | |
| # 将除了Model列之外的列的value转换为数值型,失败的为NaN | |
| df = df.apply(pd.to_numeric, errors="coerce") | |
| # 保留小数点后两位 | |
| df = df.round(2) | |
| # 给每一行添加一列BestScore | |
| df["BestScore"] = df.max(axis=1) | |
| # 根据BestScore给df排序 | |
| df = df.sort_values(by="BestScore", ascending=False) | |
| # reset_index | |
| df = df.reset_index() | |
| return df | |
| def dataframe_to_gradio(df, is_mc=True, shot=None): | |
| if is_mc: | |
| df = process_mc_df(df, shot) | |
| headers = df.columns | |
| # types = ["str"] + ["number"] * (len(headers) - 1) | |
| return gr.components.Dataframe( | |
| value=df.values.tolist(), | |
| headers=[label for label in df.columns], | |
| # datatype=types, | |
| # max_rows=10, | |
| ) | |
| def plot_radar_chart(df, attributes): | |
| fig = go.Figure() | |
| for index, row in df.iterrows(): | |
| model = row['Model'] | |
| values = row[attributes].tolist() | |
| fig.add_trace(go.Scatterpolar( | |
| r=values, | |
| theta=attributes, | |
| fill='toself', | |
| name=model | |
| )) | |
| fig.update_layout( | |
| title="OpsEval", | |
| polar=dict( | |
| radialaxis=dict( | |
| visible=True, | |
| range=[0, 0.9] | |
| )), | |
| showlegend=True | |
| ) | |
| return fig | |
| def create_lang_leader_board(lang_dict): | |
| best_scores = {} | |
| best_plot_datasets = [] | |
| for dataset, value in lang_dict.items(): | |
| for cat, df in value.items(): | |
| if cat == 'mc': | |
| processed = process_mc_df(df) | |
| bestscores = processed['BestScore'] | |
| best_scores[dataset] = bestscores | |
| best_plot_datasets.append(dataset) | |
| best_df = pd.DataFrame(best_scores) | |
| # print(best_scores) | |
| # print(best_df) | |
| # plot = plot_radar_chart(pd.DataFrame(best_scores), best_plot_datasets) | |
| # gr.Plot(plot) | |
| for dataset, value in lang_dict.items(): | |
| with gr.Tab(dataset_abbr_en_dict[dataset]): | |
| for cat, df in value.items(): | |
| if cat == 'mc': | |
| for shot in ['Zeroshot', 'Fewshot']: | |
| with gr.Tab(f'Multiple Choice Question ({shot})'): | |
| dataframe_to_gradio(df, is_mc=True, shot=shot) | |
| else: | |
| with gr.Tab('Question Answering'): | |
| dataframe_to_gradio(df, is_mc=False) | |
| def launch_gradio(): | |
| demo = gr.Blocks() | |
| with demo: | |
| gr.HTML(TITLE) | |
| gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
| for key, dict in dict_lang.items(): | |
| with gr.Tab(key): | |
| create_lang_leader_board(dict) | |
| demo.launch() | |
| pd.set_option('display.float_format', '{:.02f}'.format) | |
| scheduler = BackgroundScheduler() | |
| scheduler.add_job(launch_gradio, 'interval', hours=1) | |
| scheduler.start() | |
| launch_gradio() |