Spaces:
Running
Running
| # gradio display leaderboard | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib | |
| # matplotlib.use('macosx') | |
| import gradio as gr | |
| import matplotlib.pyplot as plt | |
| import plotly.graph_objects as go | |
| from apscheduler.schedulers.background import BackgroundScheduler | |
| from texts import * | |
| from leaderboards import eng_leaderboards, chi_leaderboards, dataset_abbr_en_dict, dataset_abbr_zh_dict | |
| import toml | |
| import os | |
| from latex_utils import gen_latex_table | |
| config = toml.load("config.toml") | |
| def create_lang_tabs(lang, lang_cates): | |
| df_dict = {} | |
| for dataset, cates in lang_cates: | |
| dataset_dt = {} | |
| for cat in cates: | |
| leaderboard_df = pd.read_csv(f'./data_v2/{dataset}_{lang}_{cat}_gen.csv') | |
| dataset_dt[cat] = leaderboard_df | |
| df_dict[dataset] = dataset_dt | |
| return df_dict | |
| dict_lang = { | |
| 'English': create_lang_tabs('en', eng_leaderboards), | |
| 'Chinese': create_lang_tabs('zh', chi_leaderboards) | |
| } | |
| def process_mc_df(df, shot=None): | |
| # 将name列重命名为Model | |
| df = df.rename(columns={"name": "Model"}) | |
| # 将zero_naive, zero_self_con, zero_cot, zero_cot_self_con, few_naive, few_self_con, few_cot, few_cot_self_con列重新组织成MultiIndex,一层为Zeroshot, Fewshot,一层为Naive, Self-Consistency, CoT, CoT+Self-Consistency | |
| df = df.set_index("Model") | |
| columns = [col for col in df.columns if col != "Model"] | |
| col_to_multi_index = { | |
| "zero_naive": ("Zeroshot", "Naive"), | |
| "zero_self_con": ("Zeroshot", "SC"), | |
| "zero_cot": ("Zeroshot", "CoT"), | |
| "zero_cot_self_con": ("Zeroshot", "CoT+SC"), | |
| "few_naive": ("Fewshot", "Naive"), | |
| "few_self_con": ("Fewshot", "SC"), | |
| "few_cot": ("Fewshot", "CoT"), | |
| "few_cot_self_con": ("Fewshot", "CoT+SC"), | |
| } | |
| columns = [col_to_multi_index[col] for col in df.columns] | |
| # df = df.stack().unstack() | |
| try: | |
| df.columns = pd.MultiIndex.from_tuples(columns) | |
| except: | |
| print(df) | |
| raise | |
| # 保留shot的列,比如如果shot=Zeroshot那么只有Zeroshot的列会被保留 | |
| if shot: | |
| df = df[shot] | |
| # 将除了Model列之外的列的value转换为数值型,失败的为NaN | |
| df = df.apply(pd.to_numeric, errors="coerce") | |
| # 保留小数点后两位 | |
| df = df.round(2) | |
| # 给每一行添加一列BestScore | |
| df["BestScore"] = df.max(axis=1) | |
| # 根据BestScore给df排序 | |
| df = df.sort_values(by="BestScore", ascending=False) | |
| # reset_index | |
| df = df.reset_index() | |
| # 对于所有空的值,填充为'/' | |
| df = df.fillna('/') | |
| return df | |
| def process_qa_df(df): | |
| # 保留小数点后四位 | |
| df = df.round(4) | |
| return df | |
| def dataframe_to_gradio(df, is_mc=True, shot=None): | |
| if is_mc: | |
| df = process_mc_df(df, shot) | |
| else: | |
| df = process_qa_df(df) | |
| headers = df.columns | |
| # types = ["str"] + ["number"] * (len(headers) - 1) | |
| return gr.components.Dataframe( | |
| value=df.values.tolist(), | |
| headers=[label for label in df.columns], | |
| # datatype=types, | |
| # max_rows=10, | |
| ) | |
| def plot_radar_chart(df, attributes): | |
| fig = go.Figure() | |
| for index, row in df.iterrows(): | |
| model = row['Model'] | |
| values = row[attributes].tolist() | |
| fig.add_trace(go.Scatterpolar( | |
| r=values, | |
| theta=attributes, | |
| fill='toself', | |
| name=model | |
| )) | |
| fig.update_layout( | |
| title="OpsEval", | |
| polar=dict( | |
| radialaxis=dict( | |
| visible=True, | |
| range=[0, 0.9] | |
| )), | |
| showlegend=True | |
| ) | |
| return fig | |
| def pop_latex_table(caption, label, lang, dataset, cat): | |
| table = gen_latex_table(caption, label, global_df[lang][dataset][cat]) | |
| return gr.Textbox(table, label="LaTeX Table", visible=True) | |
| def generate_csv(lang, dataset, cat, shot, chosen_dataset): | |
| df = global_df[lang][dataset][cat] | |
| df.to_csv(f"{chosen_dataset}_{lang}_{cat}_{shot}.csv", index=False) | |
| download_link = gr.File(label="Download Link", type="filepath", value=f"{chosen_dataset}_{lang}_{cat}_{shot}.csv", | |
| visible=True) | |
| return download_link | |
| global_df = {} | |
| def create_lang_leader_board(lang_dict, lang, dis_lang='en'): | |
| best_scores = {} | |
| best_plot_datasets = [] | |
| for dataset, value in lang_dict.items(): | |
| for cat, df in value.items(): | |
| if cat == 'mc': | |
| processed = process_mc_df(df) | |
| bestscores = processed['BestScore'] | |
| best_scores[dataset] = bestscores | |
| best_plot_datasets.append(dataset) | |
| best_df = pd.DataFrame(best_scores) | |
| # print(best_scores) | |
| # print(best_df) | |
| # plot = plot_radar_chart(pd.DataFrame(best_scores), best_plot_datasets) | |
| # gr.Plot(plot) | |
| tab_list = [] | |
| latex_btn_list = [] | |
| latex_textbox_list = [] | |
| csv_btn_list = [] | |
| csv_textbox_list = [] | |
| export_params = [] | |
| if lang not in global_df: | |
| global_df[lang] = {} | |
| for dataset, value in lang_dict.items(): | |
| if dataset not in global_df[lang]: | |
| global_df[lang][dataset] = {} | |
| chosen_dict = dataset_abbr_en_dict if dis_lang == "en" else dataset_abbr_zh_dict | |
| with gr.Tab(chosen_dict[dataset]) as tab: | |
| for cat, df in value.items(): | |
| if cat not in global_df[lang][dataset]: | |
| global_df[lang][dataset][cat] = df.copy(deep=True) | |
| if cat == 'mc': | |
| for shot in ['Zeroshot', 'Fewshot']: | |
| with gr.Tab(f'Multiple Choice Question ({shot})'): | |
| df_component = dataframe_to_gradio(df, is_mc=True, shot=shot) | |
| # 加一个latex表格导出按钮, 按一下弹出一个浮动文本窗口 | |
| latex_textbox = gr.Textbox(label="LaTeX Table", visible=False) | |
| download_link = gr.File(label="Download Link", type="filepath", | |
| visible=False) | |
| latex_caption = gr.Text(value=f"{chosen_dict[dataset]} Multiple Choice Question ({shot}, {lang}) Leaderboard", visible=False) | |
| latex_label = gr.Text(value=f"tab:{dataset}_{shot}_{lang}", visible=False) | |
| param_lang = gr.Text(value=lang, visible=False) | |
| param_dataset = gr.Text(value=dataset, visible=False) | |
| param_cat = gr.Text(value=cat, visible=False) | |
| param_shot = gr.Text(value=shot, visible=False) | |
| dataset_name = gr.Text(value=chosen_dict[dataset], visible=False) | |
| with gr.Row(): | |
| latex_button = gr.Button("Export LaTeX Table", variant="primary") | |
| csv_button = gr.Button("Export CSV", variant="primary") | |
| latex_btn_list.append(latex_button) | |
| latex_textbox_list.append(latex_textbox) | |
| csv_btn_list.append(csv_button) | |
| csv_textbox_list.append(download_link) | |
| export_params.append({ | |
| "caption": latex_caption, | |
| "label": latex_label, | |
| "lang": param_lang, | |
| "dataset": param_dataset, | |
| "cat": param_cat, | |
| "shot": param_shot, | |
| "dataset_name": dataset_name | |
| }) | |
| else: | |
| with gr.Tab('Question Answering'): | |
| df_component = dataframe_to_gradio(df, is_mc=False) | |
| # 加一个latex表格导出按钮, 按一下弹出一个浮动文本窗口 | |
| latex_textbox = gr.Textbox(label="LaTeX Table", visible=False) | |
| download_link = gr.File(label="Download Link", type="filepath", | |
| visible=False) | |
| latex_caption = gr.Text(value=f"{chosen_dict[dataset]} Question Answering ({lang}) Leaderboard", visible=False) | |
| latex_label = gr.Text(value=f"tab:{dataset}_{lang}", visible=False) | |
| param_lang = gr.Text(value=lang, visible=False) | |
| param_dataset = gr.Text(value=dataset, visible=False) | |
| param_cat = gr.Text(value=cat, visible=False) | |
| param_shot = gr.Text(value=shot, visible=False) | |
| dataset_name = gr.Text(value=chosen_dict[dataset], visible=False) | |
| with gr.Row(): | |
| latex_button = gr.Button("Export LaTeX Table", variant="primary") | |
| csv_button = gr.Button("Export CSV", variant="primary") | |
| latex_btn_list.append(latex_button) | |
| latex_textbox_list.append(latex_textbox) | |
| csv_btn_list.append(csv_button) | |
| csv_textbox_list.append(download_link) | |
| export_params.append({ | |
| "caption": latex_caption, | |
| "label": latex_label, | |
| "lang": param_lang, | |
| "dataset": param_dataset, | |
| "cat": param_cat, | |
| "shot": param_shot, | |
| "dataset_name": dataset_name | |
| }) | |
| tab_list.append(tab) | |
| return tab_list, latex_btn_list, latex_textbox_list, csv_btn_list, csv_textbox_list, export_params | |
| def get_latest_modification_date(): | |
| latest = 0 | |
| for file in os.listdir(config['dataset']['dataset_dir']): | |
| if file.endswith('.csv'): | |
| mtime = os.path.getmtime(os.path.join(config['dataset']['dataset_dir'], file)) | |
| latest = max(latest, mtime) | |
| latest = pd.to_datetime(latest, unit='s') | |
| return latest.strftime("%Y-%m-%d %H:%M:%S") | |
| translation_dict = { | |
| 'zh': { | |
| 'intro': ZH_INTRODUCTION_TEXT, | |
| 'title': ZH_TITLE, | |
| 'lb_sec': f"""# 🏅 排行榜 \n 更新时间: {get_latest_modification_date()}\n""", | |
| }, | |
| 'en': { | |
| 'intro': INTRODUCTION_TEXT, | |
| 'title': TITLE, | |
| 'lb_sec': f"""# 🏅 Leaderboard \n Latest update: {get_latest_modification_date()}\n""" | |
| } | |
| } | |
| def get_language_lb(language): | |
| tab_dict = {'English': None, 'Chinese': None} | |
| for key, dict in dict_lang.items(): | |
| tab_list, latex_btn_list, latex_textbox_list, csv_btn_list, csv_textbox_list, export_params = create_lang_leader_board(dict, key, language) | |
| tab_dict[key] = (tab_list, latex_btn_list, latex_textbox_list, csv_btn_list, csv_textbox_list, export_params) | |
| return tab_dict | |
| def switch_language(language): | |
| # gr.update(visible=True) | |
| tab_dict = get_language_lb(language) | |
| tab_list = [*(tab_dict['English'][0]), *(tab_dict['Chinese'][0])] | |
| return translation_dict[language]['title'], translation_dict[language]['intro'], translation_dict[language]['lb_sec'], *tab_list, language | |
| def get_lb_body(language='en'): | |
| tab_dict = {'English': None, 'Chinese': None} | |
| with gr.Blocks() as body: | |
| for key, dict in dict_lang.items(): | |
| with gr.Tab(key): | |
| tab_list, latex_btn_list, latex_textbox_list, csv_btn_list, csv_textbox_list, export_params = create_lang_leader_board(dict, key, language) | |
| tab_dict[key] = (tab_list, latex_btn_list, latex_textbox_list, csv_btn_list, csv_textbox_list, export_params) | |
| return body, tab_dict | |
| def launch_gradio(): | |
| demo = gr.Blocks() | |
| with demo: | |
| lang_state = gr.State("en") | |
| with gr.Row(): | |
| en_button = gr.Button("English", variant="primary") | |
| zh_button = gr.Button("中文", variant="primary") | |
| title = gr.HTML(TITLE) | |
| intro = gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
| leaderboard_section = gr.Markdown(f"""# 🏅 Leaderboard \n Latest update: {get_latest_modification_date()}\n""", | |
| elem_classes="markdown-text") | |
| lb_body, tab_dict = get_lb_body(language=lang_state.value) | |
| for lang in ['English', 'Chinese']: | |
| tab_list, latex_btn_list, latex_textbox_list, csv_btn_list, csv_textbox_list, export_params = tab_dict[lang] | |
| for btn, params, textbox in zip(latex_btn_list, export_params, latex_textbox_list): | |
| btn.click(pop_latex_table, inputs=[ | |
| params["caption"], | |
| params["label"], | |
| params["lang"], | |
| params["dataset"], | |
| params["cat"]], | |
| outputs=[textbox]) | |
| for btn, params, textbox in zip(csv_btn_list, export_params, csv_textbox_list): | |
| btn.click(generate_csv, inputs=[ | |
| params["lang"], | |
| params["dataset"], | |
| params["cat"], | |
| params["shot"], | |
| params["dataset_name"]], | |
| outputs=[textbox]) | |
| tab_list = [*(tab_dict['English'][0]), *(tab_dict['Chinese'][0])] | |
| # print(tab_list) | |
| en_button.click(switch_language, inputs=[gr.State("en")], outputs=[title, intro, leaderboard_section, *tab_list, lang_state], postprocess=False) | |
| zh_button.click(switch_language, inputs=[gr.State("zh")], outputs=[title, intro, leaderboard_section, *tab_list, lang_state], postprocess=False) | |
| return demo | |
| pd.set_option('display.float_format', '{:.02f}'.format) | |
| scheduler = BackgroundScheduler() | |
| scheduler.add_job(launch_gradio, 'interval', hours=1) | |
| scheduler.start() | |
| demo = launch_gradio() | |
| demo.launch() |