Spaces:
Runtime error
Runtime error
michal
commited on
Commit
·
c5afbf5
1
Parent(s):
64298d7
Upload
Browse files- app.py +120 -0
- leaderboards/all_types_years.json +0 -0
- leaderboards/llmzszl.json +410 -0
- src/abouts.py +90 -0
- src/envs.py +25 -0
- src/images/logo.png +0 -0
- src/structures/gim.py +42 -0
- src/structures/leaderboard_structure.py +53 -0
- src/structures/mat.py +42 -0
- src/structures/osm.py +42 -0
- src/structures/zaw.py +42 -0
- src/styles.py +108 -0
- src/utils.py +1 -0
app.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from gradio_leaderboard import Leaderboard
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from src.styles import custom_css
|
| 6 |
+
|
| 7 |
+
from src.structures.leaderboard_structure import (LB_LLMZSZL,
|
| 8 |
+
ORDER_LIST,
|
| 9 |
+
DATA_TYPES,
|
| 10 |
+
COLUMN_HEADERS,
|
| 11 |
+
filter_data,
|
| 12 |
+
filter_columns,
|
| 13 |
+
)
|
| 14 |
+
from src.structures.gim import GIM_SCORES
|
| 15 |
+
from src.structures.zaw import ZAW_SCORES
|
| 16 |
+
from src.structures.mat import MAT_SCORES
|
| 17 |
+
from src.structures.osm import OSM_SCORES
|
| 18 |
+
|
| 19 |
+
global data_component
|
| 20 |
+
from src.abouts import *
|
| 21 |
+
|
| 22 |
+
main = gr.Blocks(css=custom_css)
|
| 23 |
+
with main:
|
| 24 |
+
with gr.Row():
|
| 25 |
+
with gr.Column():
|
| 26 |
+
image = gr.Image("src/images/logo.png",
|
| 27 |
+
show_download_button=False,
|
| 28 |
+
show_share_button=False,
|
| 29 |
+
show_fullscreen_button=False,
|
| 30 |
+
container=False)
|
| 31 |
+
with gr.Column():
|
| 32 |
+
gr.HTML(HEADER_TITLE)
|
| 33 |
+
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 34 |
+
with gr.Tab("🏅 LLMZSZL"):
|
| 35 |
+
gr.Markdown("""## Overall scores""")
|
| 36 |
+
# Checkbox to toggle column visibility
|
| 37 |
+
columns_selector = gr.CheckboxGroup(
|
| 38 |
+
choices=ORDER_LIST,
|
| 39 |
+
label="Select columns to display",
|
| 40 |
+
value=ORDER_LIST,
|
| 41 |
+
)
|
| 42 |
+
# Dataframe component to display the leaderboard data
|
| 43 |
+
data_component = gr.components.Dataframe(
|
| 44 |
+
value=LB_LLMZSZL,
|
| 45 |
+
headers=COLUMN_HEADERS,
|
| 46 |
+
type="pandas",
|
| 47 |
+
datatype=DATA_TYPES,
|
| 48 |
+
interactive=False,
|
| 49 |
+
visible=True,
|
| 50 |
+
column_widths=[400, 200, 100, 120, 100]
|
| 51 |
+
)
|
| 52 |
+
# def update_data(selected_columns, selected_languages):
|
| 53 |
+
# return update_dataframe(selected_columns, selected_languages)
|
| 54 |
+
|
| 55 |
+
def update_dataframe(selected_columns):
|
| 56 |
+
return filter_columns(selected_columns)
|
| 57 |
+
|
| 58 |
+
columns_selector.change(update_dataframe, inputs=columns_selector, outputs=data_component)
|
| 59 |
+
# language_selector.change(update_data, inputs=[columns_selector, language_selector], outputs=data_component)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
with gr.Tab("📝 Middle School exam"):
|
| 63 |
+
gr.Markdown(GIM_DESC)
|
| 64 |
+
data_component = gr.components.Dataframe(
|
| 65 |
+
value=GIM_SCORES,
|
| 66 |
+
type="pandas",
|
| 67 |
+
interactive=False,
|
| 68 |
+
visible=True,
|
| 69 |
+
datatype=["markdown"]+["number"]*18,
|
| 70 |
+
column_widths=[400] + [80] * 18
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
with gr.Tab("📝 8-grade exam"):
|
| 74 |
+
gr.Markdown(OSM_DESC)
|
| 75 |
+
data_component = gr.components.Dataframe(
|
| 76 |
+
value=OSM_SCORES,
|
| 77 |
+
type="pandas",
|
| 78 |
+
interactive=False,
|
| 79 |
+
visible=True,
|
| 80 |
+
datatype=["markdown"]+["number"]*5,
|
| 81 |
+
column_widths=[400] + [80] * 5
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
with gr.Tab("📝 High School exam"):
|
| 85 |
+
gr.Markdown(MAT_DESC)
|
| 86 |
+
data_component = gr.components.Dataframe(
|
| 87 |
+
value=MAT_SCORES,
|
| 88 |
+
type="pandas",
|
| 89 |
+
interactive=False,
|
| 90 |
+
visible=True,
|
| 91 |
+
datatype=["markdown"]+["number"]*22,
|
| 92 |
+
column_widths=[400] + [80] * 22
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
with gr.Tab("📝 Professional exam"):
|
| 96 |
+
gr.Markdown(ZAW_DESC)
|
| 97 |
+
data_component = gr.components.Dataframe(
|
| 98 |
+
value=ZAW_SCORES,
|
| 99 |
+
type="pandas",
|
| 100 |
+
interactive=False,
|
| 101 |
+
visible=True,
|
| 102 |
+
datatype=["markdown"]+["number"]*12,
|
| 103 |
+
column_widths=[400] + [80] * 12
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
with gr.Tab("📝 About"):
|
| 107 |
+
gr.Markdown(ABOUT)
|
| 108 |
+
|
| 109 |
+
with gr.Column():
|
| 110 |
+
with gr.Accordion("📙 Citation", open=False):
|
| 111 |
+
citation_button = gr.Textbox(
|
| 112 |
+
value="TEST",
|
| 113 |
+
label="TEST_LABEL",
|
| 114 |
+
lines=20,
|
| 115 |
+
elem_id="citation-button",
|
| 116 |
+
show_copy_button=True,
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
if __name__ == "__main__":
|
| 120 |
+
main.launch()
|
leaderboards/all_types_years.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
leaderboards/llmzszl.json
ADDED
|
@@ -0,0 +1,410 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"Lang": "E",
|
| 4 |
+
"Family": "Llama",
|
| 5 |
+
"Name": "meta-llama/Llama-2-7b-hf",
|
| 6 |
+
"Parameters (B)": 7,
|
| 7 |
+
"Date": "23-07",
|
| 8 |
+
"Score": 28.04
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"Lang": "E",
|
| 12 |
+
"Family": "Llama",
|
| 13 |
+
"Name": "meta-llama/Llama-2-13b-hf",
|
| 14 |
+
"Parameters (B)": 13,
|
| 15 |
+
"Date": "23-07",
|
| 16 |
+
"Score": 33.85
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"Lang": "E",
|
| 20 |
+
"Family": "Llama",
|
| 21 |
+
"Name": "meta-llama/Llama-2-70b-hf",
|
| 22 |
+
"Parameters (B)": 70,
|
| 23 |
+
"Date": "23-07",
|
| 24 |
+
"Score": 46.38
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"Lang": "E",
|
| 28 |
+
"Family": "Phi",
|
| 29 |
+
"Name": "microsoft/phi-1",
|
| 30 |
+
"Parameters (B)": 1,
|
| 31 |
+
"Date": "24-04",
|
| 32 |
+
"Score": 25.73
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"Lang": "E",
|
| 36 |
+
"Family": "Phi",
|
| 37 |
+
"Name": "microsoft/phi-1_5",
|
| 38 |
+
"Parameters (B)": 1,
|
| 39 |
+
"Date": "24-04",
|
| 40 |
+
"Score": 24.25
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"Lang": "E",
|
| 44 |
+
"Family": "Phi",
|
| 45 |
+
"Name": "microsoft/phi-2",
|
| 46 |
+
"Parameters (B)": 3,
|
| 47 |
+
"Date": "24-01",
|
| 48 |
+
"Score": 25.6
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"Lang": "E",
|
| 52 |
+
"Family": "Phi",
|
| 53 |
+
"Name": "microsoft/Phi-3-mini-4k-instruct",
|
| 54 |
+
"Parameters (B)": 4,
|
| 55 |
+
"Date": "24-07",
|
| 56 |
+
"Score": 33.44
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"Lang": "E",
|
| 60 |
+
"Family": "Qwen",
|
| 61 |
+
"Name": "Qwen/Qwen2-1.5B",
|
| 62 |
+
"Parameters (B)": 5,
|
| 63 |
+
"Date": "24-05",
|
| 64 |
+
"Score": 34.19
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"Lang": "E",
|
| 68 |
+
"Family": "Qwen",
|
| 69 |
+
"Name": "Qwen/Qwen2-7B",
|
| 70 |
+
"Parameters (B)": 7,
|
| 71 |
+
"Date": "24-06",
|
| 72 |
+
"Score": 45.59
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"Lang": "E",
|
| 76 |
+
"Family": "gemma",
|
| 77 |
+
"Name": "google/gemma-7b",
|
| 78 |
+
"Parameters (B)": 7,
|
| 79 |
+
"Date": "24-02",
|
| 80 |
+
"Score": 46.84
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"Lang": "P",
|
| 84 |
+
"Family": "Bielik",
|
| 85 |
+
"Name": "speakleash/Bielik-7B-v0.1",
|
| 86 |
+
"Parameters (B)": 7,
|
| 87 |
+
"Date": "24-03",
|
| 88 |
+
"Score": 39.15
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"Lang": "P",
|
| 92 |
+
"Family": "Bielik",
|
| 93 |
+
"Name": "speakleash/Bielik-7B-Instruct-v0.1",
|
| 94 |
+
"Parameters (B)": 7,
|
| 95 |
+
"Date": "24-03",
|
| 96 |
+
"Score": 40.77
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"Lang": "P",
|
| 100 |
+
"Family": "Bielik",
|
| 101 |
+
"Name": "speakleash/Bielik-11B-v2",
|
| 102 |
+
"Parameters (B)": 11,
|
| 103 |
+
"Date": "24-08",
|
| 104 |
+
"Score": 55.14
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"Lang": "P",
|
| 108 |
+
"Family": "Bielik",
|
| 109 |
+
"Name": "speakleash/Bielik-11B-v2.0-Instruct",
|
| 110 |
+
"Parameters (B)": 11,
|
| 111 |
+
"Date": "24-08",
|
| 112 |
+
"Score": 55.61
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"Lang": "P",
|
| 116 |
+
"Family": "Bielik",
|
| 117 |
+
"Name": "speakleash/Bielik-11B-v2.1-Instruct",
|
| 118 |
+
"Parameters (B)": 11,
|
| 119 |
+
"Date": "24-08",
|
| 120 |
+
"Score": 57.52
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"Lang": "P",
|
| 124 |
+
"Family": "Bielik",
|
| 125 |
+
"Name": "speakleash/Bielik-11B-v2.2-Instruct",
|
| 126 |
+
"Parameters (B)": 11,
|
| 127 |
+
"Date": "24-08",
|
| 128 |
+
"Score": 57.36
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"Lang": "P",
|
| 132 |
+
"Family": "Qra",
|
| 133 |
+
"Name": "OPI-PG/Qra-1b",
|
| 134 |
+
"Parameters (B)": 1,
|
| 135 |
+
"Date": "24-02",
|
| 136 |
+
"Score": 25.47
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"Lang": "P",
|
| 140 |
+
"Family": "Qra",
|
| 141 |
+
"Name": "OPI-PG/Qra-7b",
|
| 142 |
+
"Parameters (B)": 7,
|
| 143 |
+
"Date": "24-02",
|
| 144 |
+
"Score": 29.07
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"Lang": "P",
|
| 148 |
+
"Family": "Qra",
|
| 149 |
+
"Name": "OPI-PG/Qra-13b",
|
| 150 |
+
"Parameters (B)": 13,
|
| 151 |
+
"Date": "24-02",
|
| 152 |
+
"Score": 34.85
|
| 153 |
+
},
|
| 154 |
+
{
|
| 155 |
+
"Lang": "P",
|
| 156 |
+
"Family": "polish-gpt2",
|
| 157 |
+
"Name": "sdadas/polish-gpt2-small",
|
| 158 |
+
"Parameters (B)": 0.2,
|
| 159 |
+
"Date": "22-09",
|
| 160 |
+
"Score": 24.19
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"Lang": "P",
|
| 164 |
+
"Family": "polish-gpt2",
|
| 165 |
+
"Name": "sdadas/polish-gpt2-medium",
|
| 166 |
+
"Parameters (B)": 0.5,
|
| 167 |
+
"Date": "22-09",
|
| 168 |
+
"Score": 24.4
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"Lang": "P",
|
| 172 |
+
"Family": "polish-gpt2",
|
| 173 |
+
"Name": "sdadas/polish-gpt2-large",
|
| 174 |
+
"Parameters (B)": 0.9,
|
| 175 |
+
"Date": "23-01",
|
| 176 |
+
"Score": 24.89
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"Lang": "P",
|
| 180 |
+
"Family": "polish-gpt2",
|
| 181 |
+
"Name": "sdadas/polish-gpt2-xl",
|
| 182 |
+
"Parameters (B)": 2,
|
| 183 |
+
"Date": "23-01",
|
| 184 |
+
"Score": 23.98
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"Lang": "P",
|
| 188 |
+
"Family": "trurl",
|
| 189 |
+
"Name": "Voicelab/trurl-2-7b-8bit",
|
| 190 |
+
"Parameters (B)": 7,
|
| 191 |
+
"Date": "23-08",
|
| 192 |
+
"Score": 31.86
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"Lang": "P",
|
| 196 |
+
"Family": "trurl",
|
| 197 |
+
"Name": "Voicelab/trurl-2-7b",
|
| 198 |
+
"Parameters (B)": 7,
|
| 199 |
+
"Date": "23-08",
|
| 200 |
+
"Score": 32.3
|
| 201 |
+
},
|
| 202 |
+
{
|
| 203 |
+
"Lang": "P",
|
| 204 |
+
"Family": "trurl",
|
| 205 |
+
"Name": "Voicelab/trurl-2-13b",
|
| 206 |
+
"Parameters (B)": 13,
|
| 207 |
+
"Date": "23-08",
|
| 208 |
+
"Score": 40.22
|
| 209 |
+
},
|
| 210 |
+
{
|
| 211 |
+
"Lang": "P",
|
| 212 |
+
"Family": "trurl",
|
| 213 |
+
"Name": "Voicelab/trurl-2-13b-8bit",
|
| 214 |
+
"Parameters (B)": 13,
|
| 215 |
+
"Date": "23-08",
|
| 216 |
+
"Score": 40.23
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"Lang": "P",
|
| 220 |
+
"Family": "trurl",
|
| 221 |
+
"Name": "Voicelab/trurl-2-13b-academic",
|
| 222 |
+
"Parameters (B)": 13,
|
| 223 |
+
"Date": "23-98",
|
| 224 |
+
"Score": 34.89
|
| 225 |
+
},
|
| 226 |
+
{
|
| 227 |
+
"Lang": "m",
|
| 228 |
+
"Family": "Llama",
|
| 229 |
+
"Name": "meta-llama/Meta-Llama-3-8B",
|
| 230 |
+
"Parameters (B)": 8,
|
| 231 |
+
"Date": "24-04",
|
| 232 |
+
"Score": 41.38
|
| 233 |
+
},
|
| 234 |
+
{
|
| 235 |
+
"Lang": "m",
|
| 236 |
+
"Family": "Llama",
|
| 237 |
+
"Name": "meta-llama/Meta-Llama-3-8B-Instruct",
|
| 238 |
+
"Parameters (B)": 8,
|
| 239 |
+
"Date": "24-04",
|
| 240 |
+
"Score": 44.83
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"Lang": "m",
|
| 244 |
+
"Family": "Llama",
|
| 245 |
+
"Name": "meta-llama/Meta-Llama-3-70B",
|
| 246 |
+
"Parameters (B)": 70,
|
| 247 |
+
"Date": "24-04",
|
| 248 |
+
"Score": 62.22
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"Lang": "m",
|
| 252 |
+
"Family": "Llama",
|
| 253 |
+
"Name": "meta-llama/Meta-Llama-3-70B-Instruct",
|
| 254 |
+
"Parameters (B)": 70,
|
| 255 |
+
"Date": "24-04",
|
| 256 |
+
"Score": 64.04
|
| 257 |
+
},
|
| 258 |
+
{
|
| 259 |
+
"Lang": "m",
|
| 260 |
+
"Family": "Llama",
|
| 261 |
+
"Name": "meta-llama/Meta-Llama-3.1-8B",
|
| 262 |
+
"Parameters (B)": 8,
|
| 263 |
+
"Date": "24-07",
|
| 264 |
+
"Score": 44.21
|
| 265 |
+
},
|
| 266 |
+
{
|
| 267 |
+
"Lang": "m",
|
| 268 |
+
"Family": "Llama",
|
| 269 |
+
"Name": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
| 270 |
+
"Parameters (B)": 8,
|
| 271 |
+
"Date": "24-07",
|
| 272 |
+
"Score": 47.41
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"Lang": "m",
|
| 276 |
+
"Family": "Llama",
|
| 277 |
+
"Name": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
| 278 |
+
"Parameters (B)": 70,
|
| 279 |
+
"Date": "24-07",
|
| 280 |
+
"Score": 66.59
|
| 281 |
+
},
|
| 282 |
+
{
|
| 283 |
+
"Lang": "m",
|
| 284 |
+
"Family": "Mistral",
|
| 285 |
+
"Name": "mistralai/Mistral-7B-v0.1",
|
| 286 |
+
"Parameters (B)": 7,
|
| 287 |
+
"Date": "23-12",
|
| 288 |
+
"Score": 37.75
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"Lang": "m",
|
| 292 |
+
"Family": "Mistral",
|
| 293 |
+
"Name": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| 294 |
+
"Parameters (B)": 7,
|
| 295 |
+
"Date": "23-12",
|
| 296 |
+
"Score": 49.46
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"Lang": "m",
|
| 300 |
+
"Family": "Mistral",
|
| 301 |
+
"Name": "mistralai/Mixtral-8x22B-Instruct-v0.1",
|
| 302 |
+
"Parameters (B)": 141,
|
| 303 |
+
"Date": "24-04",
|
| 304 |
+
"Score": 58.17
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"Lang": "m",
|
| 308 |
+
"Family": "Mistral",
|
| 309 |
+
"Name": "mistralai/Mistral-7B-Instruct-v0.1",
|
| 310 |
+
"Parameters (B)": 7,
|
| 311 |
+
"Date": "23-12",
|
| 312 |
+
"Score": 35.98
|
| 313 |
+
},
|
| 314 |
+
{
|
| 315 |
+
"Lang": "m",
|
| 316 |
+
"Family": "Mistral",
|
| 317 |
+
"Name": "mistralai/Mistral-7B-Instruct-v0.2",
|
| 318 |
+
"Parameters (B)": 7,
|
| 319 |
+
"Date": "23-12",
|
| 320 |
+
"Score": 40.75
|
| 321 |
+
},
|
| 322 |
+
{
|
| 323 |
+
"Lang": "m",
|
| 324 |
+
"Family": "Mistral",
|
| 325 |
+
"Name": "mistralai/Mistral-7B-v0.3",
|
| 326 |
+
"Parameters (B)": 7,
|
| 327 |
+
"Date": "24-05",
|
| 328 |
+
"Score": 37.08
|
| 329 |
+
},
|
| 330 |
+
{
|
| 331 |
+
"Lang": "m",
|
| 332 |
+
"Family": "Mistral",
|
| 333 |
+
"Name": "mistralai/Mistral-7B-Instruct-v0.3",
|
| 334 |
+
"Parameters (B)": 7,
|
| 335 |
+
"Date": "24-05",
|
| 336 |
+
"Score": 41.72
|
| 337 |
+
},
|
| 338 |
+
{
|
| 339 |
+
"Lang": "m",
|
| 340 |
+
"Family": "Mistral",
|
| 341 |
+
"Name": "mistralai/Mistral-Large-Instruct-2407",
|
| 342 |
+
"Parameters (B)": 123,
|
| 343 |
+
"Date": "24-07",
|
| 344 |
+
"Score": 67.17
|
| 345 |
+
},
|
| 346 |
+
{
|
| 347 |
+
"Lang": "m",
|
| 348 |
+
"Family": "WizardLM",
|
| 349 |
+
"Name": "lucyknada/microsoft_WizardLM-2-7B",
|
| 350 |
+
"Parameters (B)": 7,
|
| 351 |
+
"Date": "24-04",
|
| 352 |
+
"Score": 38.23
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"Lang": "m",
|
| 356 |
+
"Family": "Yi",
|
| 357 |
+
"Name": "01-ai/Yi-34B-Chat-4bits",
|
| 358 |
+
"Parameters (B)": 34,
|
| 359 |
+
"Date": "23-11",
|
| 360 |
+
"Score": 40.28
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"Lang": "m",
|
| 364 |
+
"Family": "Yi",
|
| 365 |
+
"Name": "01-ai/Yi-34B-Chat",
|
| 366 |
+
"Parameters (B)": 34,
|
| 367 |
+
"Date": "23-11",
|
| 368 |
+
"Score": 41.42
|
| 369 |
+
},
|
| 370 |
+
{
|
| 371 |
+
"Lang": "m",
|
| 372 |
+
"Family": "Yi",
|
| 373 |
+
"Name": "01-ai/Yi-34B-200K",
|
| 374 |
+
"Parameters (B)": 34,
|
| 375 |
+
"Date": "24-03",
|
| 376 |
+
"Score": 37.56
|
| 377 |
+
},
|
| 378 |
+
{
|
| 379 |
+
"Lang": "m",
|
| 380 |
+
"Family": "Yi",
|
| 381 |
+
"Name": "01-ai/Yi-1.5-9B",
|
| 382 |
+
"Parameters (B)": 9,
|
| 383 |
+
"Date": "24-05",
|
| 384 |
+
"Score": 37.06
|
| 385 |
+
},
|
| 386 |
+
{
|
| 387 |
+
"Lang": "m",
|
| 388 |
+
"Family": "Yi",
|
| 389 |
+
"Name": "01-ai/Yi-1.5-9B-Chat",
|
| 390 |
+
"Parameters (B)": 9,
|
| 391 |
+
"Date": "24-05",
|
| 392 |
+
"Score": 37.59
|
| 393 |
+
},
|
| 394 |
+
{
|
| 395 |
+
"Lang": "m",
|
| 396 |
+
"Family": "Yi",
|
| 397 |
+
"Name": "01-ai/Yi-1.5-34B",
|
| 398 |
+
"Parameters (B)": 34,
|
| 399 |
+
"Date": "24-05",
|
| 400 |
+
"Score": 41.14
|
| 401 |
+
},
|
| 402 |
+
{
|
| 403 |
+
"Lang": "m",
|
| 404 |
+
"Family": "Yi",
|
| 405 |
+
"Name": "01-ai/Yi-1.5-34B-Chat",
|
| 406 |
+
"Parameters (B)": 34,
|
| 407 |
+
"Date": "24-05",
|
| 408 |
+
"Score": 41.47
|
| 409 |
+
}
|
| 410 |
+
]
|
src/abouts.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
HEADER_TITLE = """
|
| 2 |
+
<div style="display: flex; flex-wrap: wrap; justify-content: space-around; align-items: center;">
|
| 3 |
+
<div>
|
| 4 |
+
<h1 align="center" id="space-title">Adam Mickiewicz University's Center for Artificial Intelligence</h1>
|
| 5 |
+
<h2 align="center" id="space-subtitle">LLMzSzŁ: a comprehensive LLM benchmark for Polish</h2>
|
| 6 |
+
</div>
|
| 7 |
+
</div>
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
MAIN_DESC = """## Overall scores"""
|
| 11 |
+
|
| 12 |
+
GIM_DESC = """### Middle School Exam
|
| 13 |
+
|
| 14 |
+
The **Middle School Exam** (*egzamin gimnazjalny*) was formerly a key component of Poland’s secondary education system. This exam was taken by students completing their lower secondary education.
|
| 15 |
+
|
| 16 |
+
#### Subjects Covered
|
| 17 |
+
|
| 18 |
+
This exam assessed students in core subjects like:
|
| 19 |
+
|
| 20 |
+
- **Polish Language**
|
| 21 |
+
- **Mathematics**
|
| 22 |
+
- **Science**
|
| 23 |
+
- **Foreign Language**
|
| 24 |
+
|
| 25 |
+
preparing them for higher levels of education.
|
| 26 |
+
|
| 27 |
+
#### Recent Changes
|
| 28 |
+
|
| 29 |
+
Due to recent educational reforms in Poland:
|
| 30 |
+
|
| 31 |
+
- **Middle Schools Phased Out**: Middle schools have been removed from the education system.
|
| 32 |
+
- **Exam Discontinued**: With this change, the Middle School Exam has also been discontinued.
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
MAT_DESC = """### High School Exam
|
| 36 |
+
|
| 37 |
+
The **High School Exam** (*matura*) is one of the most critical exams in Poland. This exam serves as a prerequisite for higher education and is a significant milestone in students' academic journeys.
|
| 38 |
+
|
| 39 |
+
#### Mandatory Subjects
|
| 40 |
+
|
| 41 |
+
The *matura* exam includes essential subjects:
|
| 42 |
+
|
| 43 |
+
- **Polish Language**
|
| 44 |
+
- **Mathematics**
|
| 45 |
+
- **Foreign Language**
|
| 46 |
+
|
| 47 |
+
#### Additional Subjects
|
| 48 |
+
|
| 49 |
+
Students may also choose additional subjects based on:
|
| 50 |
+
|
| 51 |
+
- **Areas of Interest**: Students select subjects aligned with their strengths or future studies.
|
| 52 |
+
- **University Requirements**: Specific subjects may be necessary for admission to certain university programs.
|
| 53 |
+
|
| 54 |
+
#### Minimum points required
|
| 55 |
+
To pass the High School Exam, students needed to achieve a minimum score of **30%** in mandatory subjects. Additional subjects do not require a minimum score to pass, but the results significantly impact the points needed for university admission.
|
| 56 |
+
"""
|
| 57 |
+
|
| 58 |
+
OSM_DESC = """### 8th-Grade Exam
|
| 59 |
+
|
| 60 |
+
The **8th-grade exam** (*egzamin ósmoklasisty*) is a standardized assessment taken by Polish students at the end of their primary education (8th grade).
|
| 61 |
+
|
| 62 |
+
#### Subjects Covered
|
| 63 |
+
|
| 64 |
+
This exam mainly includes tests in:
|
| 65 |
+
|
| 66 |
+
- **Polish Language**
|
| 67 |
+
- **Mathematics**
|
| 68 |
+
- **Foreign Language**
|
| 69 |
+
|
| 70 |
+
#### Purpose and Role
|
| 71 |
+
|
| 72 |
+
- **Secondary Education Foundation**: Provides a foundation for secondary education placements, guiding students to the next stage of their academic journey.
|
| 73 |
+
- **Student Assessment**: Evaluates core competencies, ensuring readiness for high school-level studies.
|
| 74 |
+
"""
|
| 75 |
+
|
| 76 |
+
ZAW_DESC = """### Professional Exam
|
| 77 |
+
|
| 78 |
+
The **Professional Exam** (*egzamin zawodowy*) is tailored for students pursuing vocational education in Poland. This exam is typically taken after completing vocational training programs, which may range from:
|
| 79 |
+
|
| 80 |
+
- **Technical High Schools**
|
| 81 |
+
- **Post-secondary Vocational Courses**
|
| 82 |
+
|
| 83 |
+
#### Purpose and Importance
|
| 84 |
+
|
| 85 |
+
- **Certification**: A successful score on this exam certifies a student’s qualifications in a specific trade or profession.
|
| 86 |
+
- **Career Readiness**: Enables students to enter the workforce directly.
|
| 87 |
+
- **Further Education**: Offers a foundation for pursuing specialized training.
|
| 88 |
+
"""
|
| 89 |
+
|
| 90 |
+
ABOUT = "## ABOUTS"
|
src/envs.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
from huggingface_hub import HfApi
|
| 4 |
+
|
| 5 |
+
# Info to change for your repository
|
| 6 |
+
# ----------------------------------
|
| 7 |
+
TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
|
| 8 |
+
|
| 9 |
+
OWNER = "MCiesiolka" # Change to your org - don't forget to create a results and request dataset, with the correct format!
|
| 10 |
+
# ----------------------------------
|
| 11 |
+
|
| 12 |
+
REPO_ID = f"{OWNER}/test_leaderboard"
|
| 13 |
+
QUEUE_REPO = f"{OWNER}/requests"
|
| 14 |
+
RESULTS_REPO = f"{OWNER}/results"
|
| 15 |
+
|
| 16 |
+
# If you setup a cache later, just change HF_HOME
|
| 17 |
+
CACHE_PATH=os.getenv("HF_HOME", ".")
|
| 18 |
+
|
| 19 |
+
# Local caches
|
| 20 |
+
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
| 21 |
+
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
|
| 22 |
+
EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
|
| 23 |
+
EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
|
| 24 |
+
|
| 25 |
+
API = HfApi(token=TOKEN)
|
src/images/logo.png
ADDED
|
src/structures/gim.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
# Define the absolute path to the file
|
| 5 |
+
abs_path = Path(__file__).parent.parent.parent
|
| 6 |
+
|
| 7 |
+
def load_json_data(file_path):
|
| 8 |
+
# Load the JSON data
|
| 9 |
+
GIM_SCORES = pd.read_json(file_path)
|
| 10 |
+
|
| 11 |
+
# Reset index so model names become a column and transpose for (year, name) pairs as rows
|
| 12 |
+
GIM_SCORES = GIM_SCORES.T.reset_index()
|
| 13 |
+
|
| 14 |
+
# Rename the first column as 'Model' to keep model names visible
|
| 15 |
+
GIM_SCORES.rename(columns={'index': 'Model'}, inplace=True)
|
| 16 |
+
|
| 17 |
+
# Filter columns that contain 'Egzaminy Gimnazjalne' in the name
|
| 18 |
+
filtered_columns = ['Model'] + [col for col in GIM_SCORES.columns if "Egzaminy Gimnazjalne" in col]
|
| 19 |
+
GIM_SCORES = GIM_SCORES[filtered_columns]
|
| 20 |
+
GIM_SCORES["Model"] = GIM_SCORES["Model"].apply(
|
| 21 |
+
lambda name: f"[{name.replace('__','/')}](https://huggingface.co/{name.replace('__','/')})"
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# Round numeric values to 2 decimal places
|
| 25 |
+
numeric_columns = GIM_SCORES.columns[1:] # Get all year columns
|
| 26 |
+
GIM_SCORES[numeric_columns] = GIM_SCORES[numeric_columns].apply(pd.to_numeric, errors='coerce') * 100
|
| 27 |
+
GIM_SCORES[numeric_columns] = GIM_SCORES[numeric_columns].round(2)
|
| 28 |
+
|
| 29 |
+
# Convert year part in column names to strings for Gradio compatibility
|
| 30 |
+
GIM_SCORES.columns = [col.split(',')[0][1:] if col != 'Model' else col for col in GIM_SCORES.columns]
|
| 31 |
+
year_columns = GIM_SCORES.columns[1:]
|
| 32 |
+
sorted_year_columns = sorted(year_columns.astype(str).tolist()) # Sort the year columns as strings
|
| 33 |
+
sorted_columns = ['Model'] + sorted_year_columns
|
| 34 |
+
GIM_SCORES = GIM_SCORES[sorted_columns]
|
| 35 |
+
return GIM_SCORES
|
| 36 |
+
|
| 37 |
+
# Define file path
|
| 38 |
+
file_path = str(abs_path / "leaderboards/all_types_years.json")
|
| 39 |
+
GIM_SCORES = load_json_data(file_path)
|
| 40 |
+
GIM_SCORES = GIM_SCORES.style.highlight_max(
|
| 41 |
+
color = '#ff7070',
|
| 42 |
+
subset=GIM_SCORES.columns[-18:]).format(precision=2)
|
src/structures/leaderboard_structure.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
abs_path = Path(__file__).parent.parent.parent
|
| 5 |
+
|
| 6 |
+
ORDER_LIST = ["Name", "Lang", "Score", "Parameters (B)", "Date"]
|
| 7 |
+
COLUMN_HEADERS = ["Name", "Language", "Score", "Parameters (B)", "Date"]
|
| 8 |
+
DATA_TYPES = ["markdown", "str", "number", "number", "str"]
|
| 9 |
+
|
| 10 |
+
def filter_data(selected_columns, search_query):
|
| 11 |
+
df = LB_LLMZSZL[selected_columns]
|
| 12 |
+
if search_query:
|
| 13 |
+
df = df[df['Name'].str.contains(search_query, case=False, na=False)]
|
| 14 |
+
return df
|
| 15 |
+
|
| 16 |
+
def filter_row(language):
|
| 17 |
+
if language:
|
| 18 |
+
return LB_LLMZSZL[LB_LLMZSZL["Lang"] == language]
|
| 19 |
+
return LB_LLMZSZL
|
| 20 |
+
|
| 21 |
+
def filter_columns(column_choices):
|
| 22 |
+
selected_columns = [col for col in ORDER_LIST if col in column_choices]
|
| 23 |
+
return LB_LLMZSZL[selected_columns]
|
| 24 |
+
|
| 25 |
+
def load_json_data(file_path, order_list):
|
| 26 |
+
LB_LLMZSZL = pd.read_json(file_path)
|
| 27 |
+
|
| 28 |
+
for column in LB_LLMZSZL.columns:
|
| 29 |
+
if LB_LLMZSZL[column].apply(type).eq(dict).any():
|
| 30 |
+
LB_LLMZSZL[column] = LB_LLMZSZL[column].apply(str)
|
| 31 |
+
|
| 32 |
+
LB_LLMZSZL["Name"] = LB_LLMZSZL["Name"].apply(
|
| 33 |
+
lambda name: f"[{name}](https://huggingface.co/{name})"
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
lang_replacements = {
|
| 37 |
+
'E': 'English',
|
| 38 |
+
'P': 'Polish',
|
| 39 |
+
'm': 'Multilingual'
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
LB_LLMZSZL["Lang"] = LB_LLMZSZL["Lang"].apply(
|
| 43 |
+
lambda lang_code: lang_replacements.get(lang_code, lang_code) # Replace using the dictionary, keep original if not found
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
ordered_columns = [col for col in order_list if col in LB_LLMZSZL.columns]
|
| 47 |
+
LB_LLMZSZL = LB_LLMZSZL[ordered_columns]
|
| 48 |
+
LB_LLMZSZL = LB_LLMZSZL.sort_values(by="Score", ascending=False)
|
| 49 |
+
return LB_LLMZSZL
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
file_path = str(abs_path / "leaderboards/llmzszl.json")
|
| 53 |
+
LB_LLMZSZL = load_json_data(file_path, ORDER_LIST)
|
src/structures/mat.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
# Define the absolute path to the file
|
| 5 |
+
abs_path = Path(__file__).parent.parent.parent
|
| 6 |
+
|
| 7 |
+
def load_json_data(file_path):
|
| 8 |
+
# Load the JSON data
|
| 9 |
+
MAT_SCORES = pd.read_json(file_path)
|
| 10 |
+
|
| 11 |
+
# Reset index so model names become a column and transpose for (year, name) pairs as rows
|
| 12 |
+
MAT_SCORES = MAT_SCORES.T.reset_index()
|
| 13 |
+
|
| 14 |
+
# Rename the first column as 'Model' to keep model names visible
|
| 15 |
+
MAT_SCORES.rename(columns={'index': 'Model'}, inplace=True)
|
| 16 |
+
|
| 17 |
+
# Filter columns that contain 'Egzaminy Gimnazjalne' in the name
|
| 18 |
+
filtered_columns = ['Model'] + [col for col in MAT_SCORES.columns if "Egzaminy Maturalne" in col]
|
| 19 |
+
MAT_SCORES = MAT_SCORES[filtered_columns]
|
| 20 |
+
MAT_SCORES["Model"] = MAT_SCORES["Model"].apply(
|
| 21 |
+
lambda name: f"[{name.replace('__','/')}](https://huggingface.co/{name.replace('__','/')})"
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# Round numeric values to 2 decimal places
|
| 25 |
+
numeric_columns = MAT_SCORES.columns[1:] # Get all year columns
|
| 26 |
+
MAT_SCORES[numeric_columns] = MAT_SCORES[numeric_columns].apply(pd.to_numeric, errors='coerce') * 100
|
| 27 |
+
MAT_SCORES[numeric_columns] = MAT_SCORES[numeric_columns].round(2)
|
| 28 |
+
|
| 29 |
+
# Convert year part in column names to strings for Gradio compatibility
|
| 30 |
+
MAT_SCORES.columns = [col.split(',')[0][1:] if col != 'Model' else col for col in MAT_SCORES.columns]
|
| 31 |
+
year_columns = MAT_SCORES.columns[1:]
|
| 32 |
+
sorted_year_columns = sorted(year_columns.astype(str).tolist()) # Sort the year columns as strings
|
| 33 |
+
sorted_columns = ['Model'] + sorted_year_columns
|
| 34 |
+
MAT_SCORES = MAT_SCORES[sorted_columns]
|
| 35 |
+
return MAT_SCORES
|
| 36 |
+
|
| 37 |
+
# Define file path
|
| 38 |
+
file_path = str(abs_path / "leaderboards/all_types_years.json")
|
| 39 |
+
MAT_SCORES = load_json_data(file_path)
|
| 40 |
+
MAT_SCORES = MAT_SCORES.style.highlight_max(
|
| 41 |
+
color = '#ff7070',
|
| 42 |
+
subset=MAT_SCORES.columns[-22:]).format(precision=2)
|
src/structures/osm.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
# Define the absolute path to the file
|
| 5 |
+
abs_path = Path(__file__).parent.parent.parent
|
| 6 |
+
|
| 7 |
+
def load_json_data(file_path):
|
| 8 |
+
# Load the JSON data
|
| 9 |
+
OSM_SCORES = pd.read_json(file_path)
|
| 10 |
+
|
| 11 |
+
# Reset index so model names become a column and transpose for (year, name) pairs as rows
|
| 12 |
+
OSM_SCORES = OSM_SCORES.T.reset_index()
|
| 13 |
+
|
| 14 |
+
# Rename the first column as 'Model' to keep model names visible
|
| 15 |
+
OSM_SCORES.rename(columns={'index': 'Model'}, inplace=True)
|
| 16 |
+
|
| 17 |
+
# Filter columns that contain 'Egzaminy Gimnazjalne' in the name
|
| 18 |
+
filtered_columns = ['Model'] + [col for col in OSM_SCORES.columns if "Egzaminy Ósmoklasisty" in col]
|
| 19 |
+
OSM_SCORES = OSM_SCORES[filtered_columns]
|
| 20 |
+
OSM_SCORES["Model"] = OSM_SCORES["Model"].apply(
|
| 21 |
+
lambda name: f"[{name.replace('__','/')}](https://huggingface.co/{name.replace('__','/')})"
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# Round numeric values to 2 decimal places
|
| 25 |
+
numeric_columns = OSM_SCORES.columns[1:] # Get all year columns
|
| 26 |
+
OSM_SCORES[numeric_columns] = OSM_SCORES[numeric_columns].apply(pd.to_numeric, errors='coerce') * 100
|
| 27 |
+
OSM_SCORES[numeric_columns] = OSM_SCORES[numeric_columns].round(2)
|
| 28 |
+
|
| 29 |
+
# Convert year part in column names to strings for Gradio compatibility
|
| 30 |
+
OSM_SCORES.columns = [col.split(',')[0][1:] if col != 'Model' else col for col in OSM_SCORES.columns]
|
| 31 |
+
year_columns = OSM_SCORES.columns[1:]
|
| 32 |
+
sorted_year_columns = sorted(year_columns.astype(str).tolist()) # Sort the year columns as strings
|
| 33 |
+
sorted_columns = ['Model'] + sorted_year_columns
|
| 34 |
+
OSM_SCORES = OSM_SCORES[sorted_columns]
|
| 35 |
+
return OSM_SCORES
|
| 36 |
+
|
| 37 |
+
# Define file path
|
| 38 |
+
file_path = str(abs_path / "leaderboards/all_types_years.json")
|
| 39 |
+
OSM_SCORES = load_json_data(file_path)
|
| 40 |
+
OSM_SCORES = OSM_SCORES.style.highlight_max(
|
| 41 |
+
color = '#ff7070',
|
| 42 |
+
subset=OSM_SCORES.columns[-5:]).format(precision=2)
|
src/structures/zaw.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
# Define the absolute path to the file
|
| 5 |
+
abs_path = Path(__file__).parent.parent.parent
|
| 6 |
+
|
| 7 |
+
def load_json_data(file_path):
|
| 8 |
+
# Load the JSON data
|
| 9 |
+
ZAW_SCORES = pd.read_json(file_path)
|
| 10 |
+
|
| 11 |
+
# Reset index so model names become a column and transpose for (year, name) pairs as rows
|
| 12 |
+
ZAW_SCORES = ZAW_SCORES.T.reset_index()
|
| 13 |
+
|
| 14 |
+
# Rename the first column as 'Model' to keep model names visible
|
| 15 |
+
ZAW_SCORES.rename(columns={'index': 'Model'}, inplace=True)
|
| 16 |
+
|
| 17 |
+
# Filter columns that contain 'Egzaminy Gimnazjalne' in the name
|
| 18 |
+
filtered_columns = ['Model'] + [col for col in ZAW_SCORES.columns if "Egzaminy Zawodowe" in col]
|
| 19 |
+
ZAW_SCORES = ZAW_SCORES[filtered_columns]
|
| 20 |
+
ZAW_SCORES["Model"] = ZAW_SCORES["Model"].apply(
|
| 21 |
+
lambda name: f"[{name.replace('__','/')}](https://huggingface.co/{name.replace('__','/')})"
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# Round numeric values to 2 decimal places
|
| 25 |
+
numeric_columns = ZAW_SCORES.columns[1:] # Get all year columns
|
| 26 |
+
ZAW_SCORES[numeric_columns] = ZAW_SCORES[numeric_columns].apply(pd.to_numeric, errors='coerce') * 100
|
| 27 |
+
ZAW_SCORES[numeric_columns] = ZAW_SCORES[numeric_columns].round(2)
|
| 28 |
+
|
| 29 |
+
# Convert year part in column names to strings for Gradio compatibility
|
| 30 |
+
ZAW_SCORES.columns = [col.split(',')[0][1:] if col != 'Model' else col for col in ZAW_SCORES.columns]
|
| 31 |
+
year_columns = ZAW_SCORES.columns[1:]
|
| 32 |
+
sorted_year_columns = sorted(year_columns.astype(str).tolist()) # Sort the year columns as strings
|
| 33 |
+
sorted_columns = ['Model'] + sorted_year_columns
|
| 34 |
+
ZAW_SCORES = ZAW_SCORES[sorted_columns]
|
| 35 |
+
return ZAW_SCORES
|
| 36 |
+
|
| 37 |
+
# Define file path
|
| 38 |
+
file_path = str(abs_path / "leaderboards/all_types_years.json")
|
| 39 |
+
ZAW_SCORES = load_json_data(file_path)
|
| 40 |
+
ZAW_SCORES = ZAW_SCORES.style.highlight_max(
|
| 41 |
+
color = '#ff7070',
|
| 42 |
+
subset=ZAW_SCORES.columns[-12:]).format(precision=2)
|
src/styles.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
custom_css = """
|
| 2 |
+
.markdown-text {
|
| 3 |
+
font-size: 16px !important;
|
| 4 |
+
}
|
| 5 |
+
#models-to-add-text {
|
| 6 |
+
font-size: 18px !important;
|
| 7 |
+
}
|
| 8 |
+
#citation-button span {
|
| 9 |
+
font-size: 16px !important;
|
| 10 |
+
}
|
| 11 |
+
#citation-button textarea {
|
| 12 |
+
font-size: 16px !important;
|
| 13 |
+
}
|
| 14 |
+
#citation-button > label > button {
|
| 15 |
+
margin: 6px;
|
| 16 |
+
transform: scale(1.3);
|
| 17 |
+
}
|
| 18 |
+
#leaderboard-table {
|
| 19 |
+
margin-top: 15px
|
| 20 |
+
}
|
| 21 |
+
#leaderboard-table-lite {
|
| 22 |
+
margin-top: 15px
|
| 23 |
+
}
|
| 24 |
+
#search-bar-table-box > div:first-child {
|
| 25 |
+
background: none;
|
| 26 |
+
border: none;
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
#search-bar {
|
| 30 |
+
padding: 0px;
|
| 31 |
+
}
|
| 32 |
+
/* Hides the final AutoEvalColumn */
|
| 33 |
+
#llm-benchmark-tab-table table td:last-child,
|
| 34 |
+
#llm-benchmark-tab-table table th:last-child {
|
| 35 |
+
display: none;
|
| 36 |
+
}
|
| 37 |
+
/* Limit the width of the first AutoEvalColumn so that names don't expand too much */
|
| 38 |
+
table td:first-child,
|
| 39 |
+
table th:first-child {
|
| 40 |
+
max-width: 400px;
|
| 41 |
+
overflow: auto;
|
| 42 |
+
white-space: nowrap;
|
| 43 |
+
}
|
| 44 |
+
table > tbody > tr > td:nth-child(3) > div {
|
| 45 |
+
overflow-x: auto;
|
| 46 |
+
width: 450px;
|
| 47 |
+
}
|
| 48 |
+
tbody span {
|
| 49 |
+
-webkit-user-select: text;
|
| 50 |
+
-moz-user-select: text;
|
| 51 |
+
-ms-user-select: text;
|
| 52 |
+
user-select: text;
|
| 53 |
+
}
|
| 54 |
+
.tab-buttons button {
|
| 55 |
+
font-size: 20px;
|
| 56 |
+
}
|
| 57 |
+
#scale-logo {
|
| 58 |
+
border-style: none !important;
|
| 59 |
+
box-shadow: none;
|
| 60 |
+
display: block;
|
| 61 |
+
margin-left: auto;
|
| 62 |
+
margin-right: auto;
|
| 63 |
+
max-width: 600px;
|
| 64 |
+
}
|
| 65 |
+
#scale-logo .download {
|
| 66 |
+
display: none;
|
| 67 |
+
}
|
| 68 |
+
#filter_type{
|
| 69 |
+
border: 0;
|
| 70 |
+
padding-left: 0;
|
| 71 |
+
padding-top: 0;
|
| 72 |
+
}
|
| 73 |
+
#filter_type label {
|
| 74 |
+
display: flex;
|
| 75 |
+
}
|
| 76 |
+
#filter_type label > span{
|
| 77 |
+
margin-top: var(--spacing-lg);
|
| 78 |
+
margin-right: 0.5em;
|
| 79 |
+
}
|
| 80 |
+
#filter_type label > .wrap{
|
| 81 |
+
width: 103px;
|
| 82 |
+
}
|
| 83 |
+
#filter_type label > .wrap .wrap-inner{
|
| 84 |
+
padding: 2px;
|
| 85 |
+
}
|
| 86 |
+
#filter_type label > .wrap .wrap-inner input{
|
| 87 |
+
width: 1px
|
| 88 |
+
}
|
| 89 |
+
#filter-columns-type{
|
| 90 |
+
border:0;
|
| 91 |
+
padding:0.5;
|
| 92 |
+
}
|
| 93 |
+
#filter-columns-size{
|
| 94 |
+
border:0;
|
| 95 |
+
padding:0.5;
|
| 96 |
+
}
|
| 97 |
+
#box-filter > .form{
|
| 98 |
+
border: 0
|
| 99 |
+
}
|
| 100 |
+
"""
|
| 101 |
+
|
| 102 |
+
get_window_url_params = """
|
| 103 |
+
function(url_params) {
|
| 104 |
+
const params = new URLSearchParams(window.location.search);
|
| 105 |
+
url_params = Object.fromEntries(params);
|
| 106 |
+
return url_params;
|
| 107 |
+
}
|
| 108 |
+
"""
|
src/utils.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|