Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- app/draw_diagram.py +14 -13
- app/pages.py +10 -7
app/draw_diagram.py
CHANGED
|
@@ -65,19 +65,20 @@ def draw(folder_name, category_one, category_two, sort, num_sort, model_size_ran
|
|
| 65 |
max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
|
| 66 |
|
| 67 |
display_names = {
|
| 68 |
-
'cross_mmlu'
|
| 69 |
-
'cross_mmlu_no_prompt'
|
| 70 |
-
'cross_logiqa'
|
| 71 |
-
'cross_logiqa_no_prompt': 'Cross-LogiQA-No-Prompt',
|
| 72 |
-
'cross_xquad'
|
| 73 |
-
'cross_xquad_no_prompt'
|
| 74 |
-
'sg_eval'
|
| 75 |
-
'sg_eval_v1_cleaned'
|
| 76 |
-
'sg_eval_v2_mcq'
|
| 77 |
-
'
|
| 78 |
-
'
|
| 79 |
-
'
|
| 80 |
-
'
|
|
|
|
| 81 |
}
|
| 82 |
|
| 83 |
data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
|
|
|
|
| 65 |
max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
|
| 66 |
|
| 67 |
display_names = {
|
| 68 |
+
'cross_mmlu' : 'Cross-MMLU',
|
| 69 |
+
'cross_mmlu_no_prompt' : 'Cross-MMLU-No-Prompt',
|
| 70 |
+
'cross_logiqa' : 'Cross-LogiQA',
|
| 71 |
+
'cross_logiqa_no_prompt' : 'Cross-LogiQA-No-Prompt',
|
| 72 |
+
'cross_xquad' : 'Cross-XQUAD',
|
| 73 |
+
'cross_xquad_no_prompt' : 'Cross-XQUAD-No-Prompt',
|
| 74 |
+
'sg_eval' : 'SG EVAL',
|
| 75 |
+
'sg_eval_v1_cleaned' : 'SG EVAL V1 Cleaned',
|
| 76 |
+
'sg_eval_v2_mcq' : 'SG EVAL V2 MCQ',
|
| 77 |
+
'sg_eval_v2_mcq_no_prompt': 'SG EVAL V2 MCQ No Prompt',
|
| 78 |
+
'sg_eval_v2_open' : 'SG EVAL V2 Open Ended',
|
| 79 |
+
'us_eval' : 'US EVAL',
|
| 80 |
+
'cn_eval' : 'CN EVAL',
|
| 81 |
+
'ph_eval' : 'PH EVAL'
|
| 82 |
}
|
| 83 |
|
| 84 |
data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
|
app/pages.py
CHANGED
|
@@ -126,6 +126,7 @@ def cultural_reasoning():
|
|
| 126 |
filters_levelone = ['Zero Shot', 'Few Shot']
|
| 127 |
filters_leveltwo = [
|
| 128 |
'SG EVAL V2 MCQ',
|
|
|
|
| 129 |
'SG EVAL V2 Open Ended',
|
| 130 |
'SG EVAL',
|
| 131 |
'SG EVAL V1 Cleaned',
|
|
@@ -138,13 +139,15 @@ def cultural_reasoning():
|
|
| 138 |
'Few Shot': 'few_shot'
|
| 139 |
}
|
| 140 |
|
| 141 |
-
category_two_dict = {
|
| 142 |
-
'SG EVAL
|
| 143 |
-
'SG EVAL
|
| 144 |
-
'SG EVAL V2
|
| 145 |
-
'
|
| 146 |
-
'
|
| 147 |
-
'
|
|
|
|
|
|
|
| 148 |
}
|
| 149 |
|
| 150 |
left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
|
|
|
|
| 126 |
filters_levelone = ['Zero Shot', 'Few Shot']
|
| 127 |
filters_leveltwo = [
|
| 128 |
'SG EVAL V2 MCQ',
|
| 129 |
+
'SG EVAL V2 MCQ No Prompt',
|
| 130 |
'SG EVAL V2 Open Ended',
|
| 131 |
'SG EVAL',
|
| 132 |
'SG EVAL V1 Cleaned',
|
|
|
|
| 139 |
'Few Shot': 'few_shot'
|
| 140 |
}
|
| 141 |
|
| 142 |
+
category_two_dict = {
|
| 143 |
+
'SG EVAL' : 'sg_eval',
|
| 144 |
+
'SG EVAL V1 Cleaned' : 'sg_eval_v1_cleaned',
|
| 145 |
+
'SG EVAL V2 MCQ' : 'sg_eval_v2_mcq',
|
| 146 |
+
'SG EVAL V2 MCQ No Prompt': 'sg_eval_v2_mcq_no_prompt',
|
| 147 |
+
'SG EVAL V2 Open Ended' : 'sg_eval_v2_open',
|
| 148 |
+
'US EVAL' : 'us_eval',
|
| 149 |
+
'CN EVAL' : 'cn_eval',
|
| 150 |
+
'PH EVAL' : 'ph_eval'
|
| 151 |
}
|
| 152 |
|
| 153 |
left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
|