Spaces:

davidkim205
/

ko-bench

Sleeping

App Files Files Community

davidkim205 commited on Aug 7, 2024

Commit

2d3d046

1 Parent(s): 7172210

update

Browse files

Files changed (1) hide show

app.py +14 -14

app.py CHANGED Viewed

@@ -62,7 +62,7 @@ def get_license(model): # 대소문자 무시하고 모델을 매칭하기 위
 # dataframe_full
 df_full_rs = df_rs.copy()
-df_full_rs.rename(columns={'score': 'KO-Bench'}, inplace=True)
 df_full_rs = df_full_rs.drop(columns=['Coding', 'Extraction', 'Humanities', 'Math', 'Reasoning', 'Roleplay', 'STEM', 'Writing'])
 df_full_rs = df_full_rs.drop(columns=['turn']) # 모델별 turn1,2 score 합병
@@ -70,16 +70,16 @@ df_full_rs = df_full_rs.groupby(['model', 'judge_model']).agg({col: custom_mean
 df_full_rs = df_full_rs.round(2)
 df_full_rs.replace("", np.nan, inplace=True)
-df_full_rs['KO-Bench/openai'] = '' # KO-Bench/openai, KO-Bench/keval 열 추가
-df_full_rs['KO-Bench/keval'] = ''
 for idx, j_model in df_full_rs['judge_model'].items():
     if j_model == 'keval':
-        df_full_rs.at[idx, 'KO-Bench/keval'] = df_full_rs.at[idx, 'KO-Bench']
     else :
-        df_full_rs.at[idx, 'KO-Bench/openai'] = df_full_rs.at[idx, 'KO-Bench']
 df_full_rs = df_full_rs.drop(columns=['judge_model'])
-df_full_rs = df_full_rs.groupby(['model']).agg({col: custom_mean for col in df_full_rs.columns if col not in ['model']}).reset_index() # KO-Bench/openai, KO-Bench/keval 행 합병
 df_full_rs = df_full_rs.round(2)
 df_full_rs.replace("", np.nan, inplace=True)
@@ -93,9 +93,9 @@ df_full_rs['Organization'] = df_full_rs['model'].apply(get_organization)
 df_full_rs['License'] = '' # License 열 추가
 df_full_rs['License'] = df_full_rs['model'].apply(get_license)
-df_full_rs = df_full_rs.sort_values(by='KO-Bench', ascending=False)
 df_full_rs.insert(0, 'rank', range(1, len(df_full_rs) + 1))
-df_full_rs = df_full_rs.drop(columns=['KO-Bench'])
 plot_models = df_full_rs['model'].unique() # model detail view를 위한 models 리스트
@@ -230,18 +230,18 @@ def search_keval_plot(dropdown_model): # keval plot 함수 정의
 #gradio
 with gr.Blocks() as demo:
     gr.Markdown("")
-    gr.Markdown("# 🏆 KO-Bench Leaderboard")
     gr.Markdown("")
-    gr.Markdown("#### The Ko-bench is a leaderboard for evaluating the multi-level conversation ability and instruction-following ability of Korean Large Language Models (LLMs).")
     gr.Markdown("- MT-Bench: a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.")
-    gr.Markdown("- KO-Bench/openai: a set of challenging multi-turn questions in Korean. We use GPT-4o to grade the model responses.")
-    gr.Markdown("- KO-Bench/keval: a set of challenging multi-turn questions in Korean. We use the keval model as an evaluation model.")
     gr.Markdown("")
-    gr.Markdown("github : https://github.com/davidkim205/ko-bench")
     gr.Markdown("keval : https://huggingface.co/collections/davidkim205/k-eval-6660063dd66e21cbdcc4fbf1")
     gr.Markdown("")
-    with gr.TabItem("KO-Bench"):
         gr.Dataframe(value=df_full_rs)
     with gr.TabItem("Openai Judgment"):
         gr.Dataframe(value=df_openai)

 # dataframe_full
 df_full_rs = df_rs.copy()
+df_full_rs.rename(columns={'score': 'Ko-Bench'}, inplace=True)
 df_full_rs = df_full_rs.drop(columns=['Coding', 'Extraction', 'Humanities', 'Math', 'Reasoning', 'Roleplay', 'STEM', 'Writing'])
 df_full_rs = df_full_rs.drop(columns=['turn']) # 모델별 turn1,2 score 합병
 df_full_rs = df_full_rs.round(2)
 df_full_rs.replace("", np.nan, inplace=True)
+df_full_rs['Ko-Bench/openai'] = '' # Ko-Bench/openai, Ko-Bench/keval 열 추가
+df_full_rs['Ko-Bench/keval'] = ''
 for idx, j_model in df_full_rs['judge_model'].items():
     if j_model == 'keval':
+        df_full_rs.at[idx, 'Ko-Bench/keval'] = df_full_rs.at[idx, 'Ko-Bench']
     else :
+        df_full_rs.at[idx, 'Ko-Bench/openai'] = df_full_rs.at[idx, 'Ko-Bench']
 df_full_rs = df_full_rs.drop(columns=['judge_model'])
+df_full_rs = df_full_rs.groupby(['model']).agg({col: custom_mean for col in df_full_rs.columns if col not in ['model']}).reset_index() # Ko-Bench/openai, Ko-Bench/keval 행 합병
 df_full_rs = df_full_rs.round(2)
 df_full_rs.replace("", np.nan, inplace=True)
 df_full_rs['License'] = '' # License 열 추가
 df_full_rs['License'] = df_full_rs['model'].apply(get_license)
+df_full_rs = df_full_rs.sort_values(by='Ko-Bench', ascending=False)
 df_full_rs.insert(0, 'rank', range(1, len(df_full_rs) + 1))
+df_full_rs = df_full_rs.drop(columns=['Ko-Bench'])
 plot_models = df_full_rs['model'].unique() # model detail view를 위한 models 리스트
 #gradio
 with gr.Blocks() as demo:
     gr.Markdown("")
+    gr.Markdown("# 🏆 Ko-Bench Leaderboard")
     gr.Markdown("")
+    gr.Markdown("#### The Ko-Bench is a leaderboard for evaluating the multi-level conversation ability and instruction-following ability of Korean Large Language Models (LLMs).")
     gr.Markdown("- MT-Bench: a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.")
+    gr.Markdown("- Ko-Bench/openai: a set of challenging multi-turn questions in Korean. We use GPT-4o to grade the model responses.")
+    gr.Markdown("- Ko-Bench/keval: a set of challenging multi-turn questions in Korean. We use the keval model as an evaluation model.")
     gr.Markdown("")
+    gr.Markdown("github : https://github.com/davidkim205/Ko-Bench")
     gr.Markdown("keval : https://huggingface.co/collections/davidkim205/k-eval-6660063dd66e21cbdcc4fbf1")
     gr.Markdown("")
+    with gr.TabItem("Ko-Bench"):
         gr.Dataframe(value=df_full_rs)
     with gr.TabItem("Openai Judgment"):
         gr.Dataframe(value=df_openai)