Spaces:

amu-cai
/

LLMZSZL_Leaderboard

Runtime error

App Files Files Community

michal commited on Nov 7, 2024

Commit

c5afbf5

1 Parent(s): 64298d7

Upload

Browse files

Files changed (13) hide show

app.py +120 -0
leaderboards/all_types_years.json +0 -0
leaderboards/llmzszl.json +410 -0
src/abouts.py +90 -0
src/envs.py +25 -0
src/images/logo.png +0 -0
src/structures/gim.py +42 -0
src/structures/leaderboard_structure.py +53 -0
src/structures/mat.py +42 -0
src/structures/osm.py +42 -0
src/structures/zaw.py +42 -0
src/styles.py +108 -0
src/utils.py +1 -0

app.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import gradio as gr
+from gradio_leaderboard import Leaderboard
+from pathlib import Path
+import pandas as pd
+from src.styles import custom_css
+from src.structures.leaderboard_structure import (LB_LLMZSZL,
+    ORDER_LIST,
+    DATA_TYPES,
+    COLUMN_HEADERS,
+    filter_data,
+    filter_columns,
+)
+from src.structures.gim import GIM_SCORES
+from src.structures.zaw import ZAW_SCORES
+from src.structures.mat import MAT_SCORES
+from src.structures.osm import OSM_SCORES
+global data_component
+from src.abouts import *
+main = gr.Blocks(css=custom_css)
+with main:
+    with gr.Row():
+        with gr.Column():
+            image = gr.Image("src/images/logo.png",
+                show_download_button=False,
+                show_share_button=False,
+                show_fullscreen_button=False,
+                container=False)
+        with gr.Column():
+            gr.HTML(HEADER_TITLE)
+    with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.Tab("🏅 LLMZSZL"):
+            gr.Markdown("""## Overall scores""")
+            # Checkbox to toggle column visibility
+            columns_selector = gr.CheckboxGroup(
+                choices=ORDER_LIST,
+                label="Select columns to display",
+                value=ORDER_LIST,
+            )
+            # Dataframe component to display the leaderboard data
+            data_component = gr.components.Dataframe(
+                value=LB_LLMZSZL,
+                headers=COLUMN_HEADERS,
+                type="pandas",
+                datatype=DATA_TYPES,
+                interactive=False,
+                visible=True,
+                column_widths=[400, 200, 100, 120, 100]
+            )
+            # def update_data(selected_columns, selected_languages):
+            #     return update_dataframe(selected_columns, selected_languages)
+            def update_dataframe(selected_columns):
+                return filter_columns(selected_columns)
+            columns_selector.change(update_dataframe, inputs=columns_selector, outputs=data_component)
+            # language_selector.change(update_data, inputs=[columns_selector, language_selector], outputs=data_component)
+        with gr.Tab("📝 Middle School exam"):
+            gr.Markdown(GIM_DESC)
+            data_component = gr.components.Dataframe(
+                value=GIM_SCORES,
+                type="pandas",
+                interactive=False,
+                visible=True,
+                datatype=["markdown"]+["number"]*18,
+                column_widths=[400] + [80] * 18
+            )
+        with gr.Tab("📝 8-grade exam"):
+            gr.Markdown(OSM_DESC)
+            data_component = gr.components.Dataframe(
+                value=OSM_SCORES,
+                type="pandas",
+                interactive=False,
+                visible=True,
+                datatype=["markdown"]+["number"]*5,
+                column_widths=[400] + [80] * 5
+            )
+        with gr.Tab("📝 High School exam"):
+            gr.Markdown(MAT_DESC)
+            data_component = gr.components.Dataframe(
+                value=MAT_SCORES,
+                type="pandas",
+                interactive=False,
+                visible=True,
+                datatype=["markdown"]+["number"]*22,
+                column_widths=[400] + [80] * 22
+            )
+        with gr.Tab("📝 Professional exam"):
+            gr.Markdown(ZAW_DESC)
+            data_component = gr.components.Dataframe(
+                value=ZAW_SCORES,
+                type="pandas",
+                interactive=False,
+                visible=True,
+                datatype=["markdown"]+["number"]*12,
+                column_widths=[400] + [80] * 12
+            )
+        with gr.Tab("📝 About"):
+            gr.Markdown(ABOUT)
+        with gr.Column():
+                with gr.Accordion("📙 Citation", open=False):
+                    citation_button = gr.Textbox(
+                        value="TEST",
+                        label="TEST_LABEL",
+                        lines=20,
+                        elem_id="citation-button",
+                        show_copy_button=True,
+                    )
+if __name__ == "__main__":
+    main.launch()

leaderboards/all_types_years.json ADDED Viewed

The diff for this file is too large to render. See raw diff

leaderboards/llmzszl.json ADDED Viewed

	@@ -0,0 +1,410 @@

+[
+  {
+    "Lang": "E",
+    "Family": "Llama",
+    "Name": "meta-llama/Llama-2-7b-hf",
+    "Parameters (B)": 7,
+    "Date": "23-07",
+    "Score": 28.04
+  },
+  {
+    "Lang": "E",
+    "Family": "Llama",
+    "Name": "meta-llama/Llama-2-13b-hf",
+    "Parameters (B)": 13,
+    "Date": "23-07",
+    "Score": 33.85
+  },
+  {
+    "Lang": "E",
+    "Family": "Llama",
+    "Name": "meta-llama/Llama-2-70b-hf",
+    "Parameters (B)": 70,
+    "Date": "23-07",
+    "Score": 46.38
+  },
+  {
+    "Lang": "E",
+    "Family": "Phi",
+    "Name": "microsoft/phi-1",
+    "Parameters (B)": 1,
+    "Date": "24-04",
+    "Score": 25.73
+  },
+  {
+    "Lang": "E",
+    "Family": "Phi",
+    "Name": "microsoft/phi-1_5",
+    "Parameters (B)": 1,
+    "Date": "24-04",
+    "Score": 24.25
+  },
+  {
+    "Lang": "E",
+    "Family": "Phi",
+    "Name": "microsoft/phi-2",
+    "Parameters (B)": 3,
+    "Date": "24-01",
+    "Score": 25.6
+  },
+  {
+    "Lang": "E",
+    "Family": "Phi",
+    "Name": "microsoft/Phi-3-mini-4k-instruct",
+    "Parameters (B)": 4,
+    "Date": "24-07",
+    "Score": 33.44
+  },
+  {
+    "Lang": "E",
+    "Family": "Qwen",
+    "Name": "Qwen/Qwen2-1.5B",
+    "Parameters (B)": 5,
+    "Date": "24-05",
+    "Score": 34.19
+  },
+  {
+    "Lang": "E",
+    "Family": "Qwen",
+    "Name": "Qwen/Qwen2-7B",
+    "Parameters (B)": 7,
+    "Date": "24-06",
+    "Score": 45.59
+  },
+  {
+    "Lang": "E",
+    "Family": "gemma",
+    "Name": "google/gemma-7b",
+    "Parameters (B)": 7,
+    "Date": "24-02",
+    "Score": 46.84
+  },
+  {
+    "Lang": "P",
+    "Family": "Bielik",
+    "Name": "speakleash/Bielik-7B-v0.1",
+    "Parameters (B)": 7,
+    "Date": "24-03",
+    "Score": 39.15
+  },
+  {
+    "Lang": "P",
+    "Family": "Bielik",
+    "Name": "speakleash/Bielik-7B-Instruct-v0.1",
+    "Parameters (B)": 7,
+    "Date": "24-03",
+    "Score": 40.77
+  },
+  {
+    "Lang": "P",
+    "Family": "Bielik",
+    "Name": "speakleash/Bielik-11B-v2",
+    "Parameters (B)": 11,
+    "Date": "24-08",
+    "Score": 55.14
+  },
+  {
+    "Lang": "P",
+    "Family": "Bielik",
+    "Name": "speakleash/Bielik-11B-v2.0-Instruct",
+    "Parameters (B)": 11,
+    "Date": "24-08",
+    "Score": 55.61
+  },
+  {
+    "Lang": "P",
+    "Family": "Bielik",
+    "Name": "speakleash/Bielik-11B-v2.1-Instruct",
+    "Parameters (B)": 11,
+    "Date": "24-08",
+    "Score": 57.52
+  },
+  {
+    "Lang": "P",
+    "Family": "Bielik",
+    "Name": "speakleash/Bielik-11B-v2.2-Instruct",
+    "Parameters (B)": 11,
+    "Date": "24-08",
+    "Score": 57.36
+  },
+  {
+    "Lang": "P",
+    "Family": "Qra",
+    "Name": "OPI-PG/Qra-1b",
+    "Parameters (B)": 1,
+    "Date": "24-02",
+    "Score": 25.47
+  },
+  {
+    "Lang": "P",
+    "Family": "Qra",
+    "Name": "OPI-PG/Qra-7b",
+    "Parameters (B)": 7,
+    "Date": "24-02",
+    "Score": 29.07
+  },
+  {
+    "Lang": "P",
+    "Family": "Qra",
+    "Name": "OPI-PG/Qra-13b",
+    "Parameters (B)": 13,
+    "Date": "24-02",
+    "Score": 34.85
+  },
+  {
+    "Lang": "P",
+    "Family": "polish-gpt2",
+    "Name": "sdadas/polish-gpt2-small",
+    "Parameters (B)": 0.2,
+    "Date": "22-09",
+    "Score": 24.19
+  },
+  {
+    "Lang": "P",
+    "Family": "polish-gpt2",
+    "Name": "sdadas/polish-gpt2-medium",
+    "Parameters (B)": 0.5,
+    "Date": "22-09",
+    "Score": 24.4
+  },
+  {
+    "Lang": "P",
+    "Family": "polish-gpt2",
+    "Name": "sdadas/polish-gpt2-large",
+    "Parameters (B)": 0.9,
+    "Date": "23-01",
+    "Score": 24.89
+  },
+  {
+    "Lang": "P",
+    "Family": "polish-gpt2",
+    "Name": "sdadas/polish-gpt2-xl",
+    "Parameters (B)": 2,
+    "Date": "23-01",
+    "Score": 23.98
+  },
+  {
+    "Lang": "P",
+    "Family": "trurl",
+    "Name": "Voicelab/trurl-2-7b-8bit",
+    "Parameters (B)": 7,
+    "Date": "23-08",
+    "Score": 31.86
+  },
+  {
+    "Lang": "P",
+    "Family": "trurl",
+    "Name": "Voicelab/trurl-2-7b",
+    "Parameters (B)": 7,
+    "Date": "23-08",
+    "Score": 32.3
+  },
+  {
+    "Lang": "P",
+    "Family": "trurl",
+    "Name": "Voicelab/trurl-2-13b",
+    "Parameters (B)": 13,
+    "Date": "23-08",
+    "Score": 40.22
+  },
+  {
+    "Lang": "P",
+    "Family": "trurl",
+    "Name": "Voicelab/trurl-2-13b-8bit",
+    "Parameters (B)": 13,
+    "Date": "23-08",
+    "Score": 40.23
+  },
+  {
+    "Lang": "P",
+    "Family": "trurl",
+    "Name": "Voicelab/trurl-2-13b-academic",
+    "Parameters (B)": 13,
+    "Date": "23-98",
+    "Score": 34.89
+  },
+  {
+    "Lang": "m",
+    "Family": "Llama",
+    "Name": "meta-llama/Meta-Llama-3-8B",
+    "Parameters (B)": 8,
+    "Date": "24-04",
+    "Score": 41.38
+  },
+  {
+    "Lang": "m",
+    "Family": "Llama",
+    "Name": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "Parameters (B)": 8,
+    "Date": "24-04",
+    "Score": 44.83
+  },
+  {
+    "Lang": "m",
+    "Family": "Llama",
+    "Name": "meta-llama/Meta-Llama-3-70B",
+    "Parameters (B)": 70,
+    "Date": "24-04",
+    "Score": 62.22
+  },
+  {
+    "Lang": "m",
+    "Family": "Llama",
+    "Name": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "Parameters (B)": 70,
+    "Date": "24-04",
+    "Score": 64.04
+  },
+  {
+    "Lang": "m",
+    "Family": "Llama",
+    "Name": "meta-llama/Meta-Llama-3.1-8B",
+    "Parameters (B)": 8,
+    "Date": "24-07",
+    "Score": 44.21
+  },
+  {
+    "Lang": "m",
+    "Family": "Llama",
+    "Name": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "Parameters (B)": 8,
+    "Date": "24-07",
+    "Score": 47.41
+  },
+  {
+    "Lang": "m",
+    "Family": "Llama",
+    "Name": "meta-llama/Meta-Llama-3.1-70B-Instruct",
+    "Parameters (B)": 70,
+    "Date": "24-07",
+    "Score": 66.59
+  },
+  {
+    "Lang": "m",
+    "Family": "Mistral",
+    "Name": "mistralai/Mistral-7B-v0.1",
+    "Parameters (B)": 7,
+    "Date": "23-12",
+    "Score": 37.75
+  },
+  {
+    "Lang": "m",
+    "Family": "Mistral",
+    "Name": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "Parameters (B)": 7,
+    "Date": "23-12",
+    "Score": 49.46
+  },
+  {
+    "Lang": "m",
+    "Family": "Mistral",
+    "Name": "mistralai/Mixtral-8x22B-Instruct-v0.1",
+    "Parameters (B)": 141,
+    "Date": "24-04",
+    "Score": 58.17
+  },
+  {
+    "Lang": "m",
+    "Family": "Mistral",
+    "Name": "mistralai/Mistral-7B-Instruct-v0.1",
+    "Parameters (B)": 7,
+    "Date": "23-12",
+    "Score": 35.98
+  },
+  {
+    "Lang": "m",
+    "Family": "Mistral",
+    "Name": "mistralai/Mistral-7B-Instruct-v0.2",
+    "Parameters (B)": 7,
+    "Date": "23-12",
+    "Score": 40.75
+  },
+  {
+    "Lang": "m",
+    "Family": "Mistral",
+    "Name": "mistralai/Mistral-7B-v0.3",
+    "Parameters (B)": 7,
+    "Date": "24-05",
+    "Score": 37.08
+  },
+  {
+    "Lang": "m",
+    "Family": "Mistral",
+    "Name": "mistralai/Mistral-7B-Instruct-v0.3",
+    "Parameters (B)": 7,
+    "Date": "24-05",
+    "Score": 41.72
+  },
+  {
+    "Lang": "m",
+    "Family": "Mistral",
+    "Name": "mistralai/Mistral-Large-Instruct-2407",
+    "Parameters (B)": 123,
+    "Date": "24-07",
+    "Score": 67.17
+  },
+  {
+    "Lang": "m",
+    "Family": "WizardLM",
+    "Name": "lucyknada/microsoft_WizardLM-2-7B",
+    "Parameters (B)": 7,
+    "Date": "24-04",
+    "Score": 38.23
+  },
+  {
+    "Lang": "m",
+    "Family": "Yi",
+    "Name": "01-ai/Yi-34B-Chat-4bits",
+    "Parameters (B)": 34,
+    "Date": "23-11",
+    "Score": 40.28
+  },
+  {
+    "Lang": "m",
+    "Family": "Yi",
+    "Name": "01-ai/Yi-34B-Chat",
+    "Parameters (B)": 34,
+    "Date": "23-11",
+    "Score": 41.42
+  },
+  {
+    "Lang": "m",
+    "Family": "Yi",
+    "Name": "01-ai/Yi-34B-200K",
+    "Parameters (B)": 34,
+    "Date": "24-03",
+    "Score": 37.56
+  },
+  {
+    "Lang": "m",
+    "Family": "Yi",
+    "Name": "01-ai/Yi-1.5-9B",
+    "Parameters (B)": 9,
+    "Date": "24-05",
+    "Score": 37.06
+  },
+  {
+    "Lang": "m",
+    "Family": "Yi",
+    "Name": "01-ai/Yi-1.5-9B-Chat",
+    "Parameters (B)": 9,
+    "Date": "24-05",
+    "Score": 37.59
+  },
+  {
+    "Lang": "m",
+    "Family": "Yi",
+    "Name": "01-ai/Yi-1.5-34B",
+    "Parameters (B)": 34,
+    "Date": "24-05",
+    "Score": 41.14
+  },
+  {
+    "Lang": "m",
+    "Family": "Yi",
+    "Name": "01-ai/Yi-1.5-34B-Chat",
+    "Parameters (B)": 34,
+    "Date": "24-05",
+    "Score": 41.47
+  }
+]

src/abouts.py ADDED Viewed

	@@ -0,0 +1,90 @@

+HEADER_TITLE = """
+<div style="display: flex; flex-wrap: wrap; justify-content: space-around; align-items: center;">
+    <div>
+        <h1 align="center" id="space-title">Adam Mickiewicz University's Center for Artificial Intelligence</h1>
+        <h2 align="center" id="space-subtitle">LLMzSzŁ: a comprehensive LLM benchmark for Polish</h2>
+    </div>
+</div>
+"""
+MAIN_DESC = """## Overall scores"""
+GIM_DESC = """### Middle School Exam
+The **Middle School Exam** (*egzamin gimnazjalny*) was formerly a key component of Poland’s secondary education system. This exam was taken by students completing their lower secondary education.
+#### Subjects Covered
+This exam assessed students in core subjects like:
+- **Polish Language**
+- **Mathematics**
+- **Science**
+- **Foreign Language**
+preparing them for higher levels of education.
+#### Recent Changes
+Due to recent educational reforms in Poland:
+- **Middle Schools Phased Out**: Middle schools have been removed from the education system.
+- **Exam Discontinued**: With this change, the Middle School Exam has also been discontinued.
+"""
+MAT_DESC = """### High School Exam
+The **High School Exam** (*matura*) is one of the most critical exams in Poland. This exam serves as a prerequisite for higher education and is a significant milestone in students' academic journeys.
+#### Mandatory Subjects
+The *matura* exam includes essential subjects:
+- **Polish Language**
+- **Mathematics**
+- **Foreign Language**
+#### Additional Subjects
+Students may also choose additional subjects based on:
+- **Areas of Interest**: Students select subjects aligned with their strengths or future studies.
+- **University Requirements**: Specific subjects may be necessary for admission to certain university programs.
+#### Minimum points required
+To pass the High School Exam, students needed to achieve a minimum score of **30%** in mandatory subjects. Additional subjects do not require a minimum score to pass, but the results significantly impact the points needed for university admission.
+"""
+OSM_DESC = """### 8th-Grade Exam
+The **8th-grade exam** (*egzamin ósmoklasisty*) is a standardized assessment taken by Polish students at the end of their primary education (8th grade).
+#### Subjects Covered
+This exam mainly includes tests in:
+- **Polish Language**
+- **Mathematics**
+- **Foreign Language**
+#### Purpose and Role
+- **Secondary Education Foundation**: Provides a foundation for secondary education placements, guiding students to the next stage of their academic journey.
+- **Student Assessment**: Evaluates core competencies, ensuring readiness for high school-level studies.
+"""
+ZAW_DESC = """### Professional Exam
+The **Professional Exam** (*egzamin zawodowy*) is tailored for students pursuing vocational education in Poland. This exam is typically taken after completing vocational training programs, which may range from:
+- **Technical High Schools**
+- **Post-secondary Vocational Courses**
+#### Purpose and Importance
+- **Certification**: A successful score on this exam certifies a student’s qualifications in a specific trade or profession.
+- **Career Readiness**: Enables students to enter the workforce directly.
+- **Further Education**: Offers a foundation for pursuing specialized training.
+"""
+ABOUT = "## ABOUTS"

src/envs.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+from huggingface_hub import HfApi
+# Info to change for your repository
+# ----------------------------------
+TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
+OWNER = "MCiesiolka" # Change to your org - don't forget to create a results and request dataset, with the correct format!
+# ----------------------------------
+REPO_ID = f"{OWNER}/test_leaderboard"
+QUEUE_REPO = f"{OWNER}/requests"
+RESULTS_REPO = f"{OWNER}/results"
+# If you setup a cache later, just change HF_HOME
+CACHE_PATH=os.getenv("HF_HOME", ".")
+# Local caches
+EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
+EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
+EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
+EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
+API = HfApi(token=TOKEN)

src/images/logo.png ADDED Viewed

src/structures/gim.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import pandas as pd
+from pathlib import Path
+# Define the absolute path to the file
+abs_path = Path(__file__).parent.parent.parent
+def load_json_data(file_path):
+    # Load the JSON data
+    GIM_SCORES = pd.read_json(file_path)
+    # Reset index so model names become a column and transpose for (year, name) pairs as rows
+    GIM_SCORES = GIM_SCORES.T.reset_index()
+    # Rename the first column as 'Model' to keep model names visible
+    GIM_SCORES.rename(columns={'index': 'Model'}, inplace=True)
+    # Filter columns that contain 'Egzaminy Gimnazjalne' in the name
+    filtered_columns = ['Model'] + [col for col in GIM_SCORES.columns if "Egzaminy Gimnazjalne" in col]
+    GIM_SCORES = GIM_SCORES[filtered_columns]
+    GIM_SCORES["Model"] = GIM_SCORES["Model"].apply(
+        lambda name: f"[{name.replace('__','/')}](https://huggingface.co/{name.replace('__','/')})"
+    )
+        # Round numeric values to 2 decimal places
+    numeric_columns = GIM_SCORES.columns[1:]  # Get all year columns
+    GIM_SCORES[numeric_columns] = GIM_SCORES[numeric_columns].apply(pd.to_numeric, errors='coerce') * 100
+    GIM_SCORES[numeric_columns] = GIM_SCORES[numeric_columns].round(2)
+    # Convert year part in column names to strings for Gradio compatibility
+    GIM_SCORES.columns = [col.split(',')[0][1:] if col != 'Model' else col for col in GIM_SCORES.columns]
+    year_columns = GIM_SCORES.columns[1:]
+    sorted_year_columns = sorted(year_columns.astype(str).tolist())  # Sort the year columns as strings
+    sorted_columns = ['Model'] + sorted_year_columns
+    GIM_SCORES = GIM_SCORES[sorted_columns]
+    return GIM_SCORES
+# Define file path
+file_path = str(abs_path / "leaderboards/all_types_years.json")
+GIM_SCORES = load_json_data(file_path)
+GIM_SCORES = GIM_SCORES.style.highlight_max(
+    color = '#ff7070',
+    subset=GIM_SCORES.columns[-18:]).format(precision=2)

src/structures/leaderboard_structure.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import pandas as pd
+from pathlib import Path
+abs_path = Path(__file__).parent.parent.parent
+ORDER_LIST = ["Name", "Lang", "Score", "Parameters (B)", "Date"]
+COLUMN_HEADERS = ["Name", "Language", "Score", "Parameters (B)", "Date"]
+DATA_TYPES = ["markdown", "str", "number", "number", "str"]
+def filter_data(selected_columns, search_query):
+    df = LB_LLMZSZL[selected_columns]
+    if search_query:
+        df = df[df['Name'].str.contains(search_query, case=False, na=False)]
+    return df
+def filter_row(language):
+    if language:
+        return LB_LLMZSZL[LB_LLMZSZL["Lang"] == language]
+    return LB_LLMZSZL
+def filter_columns(column_choices):
+    selected_columns = [col for col in ORDER_LIST if col in column_choices]
+    return LB_LLMZSZL[selected_columns]
+def load_json_data(file_path, order_list):
+    LB_LLMZSZL = pd.read_json(file_path)
+    for column in LB_LLMZSZL.columns:
+        if LB_LLMZSZL[column].apply(type).eq(dict).any():
+            LB_LLMZSZL[column] = LB_LLMZSZL[column].apply(str)
+    LB_LLMZSZL["Name"] = LB_LLMZSZL["Name"].apply(
+        lambda name: f"[{name}](https://huggingface.co/{name})"
+    )
+    lang_replacements = {
+        'E': 'English',
+        'P': 'Polish',
+        'm': 'Multilingual'
+    }
+    LB_LLMZSZL["Lang"] = LB_LLMZSZL["Lang"].apply(
+        lambda lang_code: lang_replacements.get(lang_code, lang_code)  # Replace using the dictionary, keep original if not found
+    )
+    ordered_columns = [col for col in order_list if col in LB_LLMZSZL.columns]
+    LB_LLMZSZL = LB_LLMZSZL[ordered_columns]
+    LB_LLMZSZL = LB_LLMZSZL.sort_values(by="Score", ascending=False)
+    return LB_LLMZSZL
+file_path = str(abs_path / "leaderboards/llmzszl.json")
+LB_LLMZSZL = load_json_data(file_path, ORDER_LIST)

src/structures/mat.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import pandas as pd
+from pathlib import Path
+# Define the absolute path to the file
+abs_path = Path(__file__).parent.parent.parent
+def load_json_data(file_path):
+    # Load the JSON data
+    MAT_SCORES = pd.read_json(file_path)
+    # Reset index so model names become a column and transpose for (year, name) pairs as rows
+    MAT_SCORES = MAT_SCORES.T.reset_index()
+    # Rename the first column as 'Model' to keep model names visible
+    MAT_SCORES.rename(columns={'index': 'Model'}, inplace=True)
+    # Filter columns that contain 'Egzaminy Gimnazjalne' in the name
+    filtered_columns = ['Model'] + [col for col in MAT_SCORES.columns if "Egzaminy Maturalne" in col]
+    MAT_SCORES = MAT_SCORES[filtered_columns]
+    MAT_SCORES["Model"] = MAT_SCORES["Model"].apply(
+        lambda name: f"[{name.replace('__','/')}](https://huggingface.co/{name.replace('__','/')})"
+    )
+        # Round numeric values to 2 decimal places
+    numeric_columns = MAT_SCORES.columns[1:]  # Get all year columns
+    MAT_SCORES[numeric_columns] = MAT_SCORES[numeric_columns].apply(pd.to_numeric, errors='coerce') * 100
+    MAT_SCORES[numeric_columns] = MAT_SCORES[numeric_columns].round(2)
+    # Convert year part in column names to strings for Gradio compatibility
+    MAT_SCORES.columns = [col.split(',')[0][1:] if col != 'Model' else col for col in MAT_SCORES.columns]
+    year_columns = MAT_SCORES.columns[1:]
+    sorted_year_columns = sorted(year_columns.astype(str).tolist())  # Sort the year columns as strings
+    sorted_columns = ['Model'] + sorted_year_columns
+    MAT_SCORES = MAT_SCORES[sorted_columns]
+    return MAT_SCORES
+# Define file path
+file_path = str(abs_path / "leaderboards/all_types_years.json")
+MAT_SCORES = load_json_data(file_path)
+MAT_SCORES = MAT_SCORES.style.highlight_max(
+    color = '#ff7070',
+    subset=MAT_SCORES.columns[-22:]).format(precision=2)

src/structures/osm.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import pandas as pd
+from pathlib import Path
+# Define the absolute path to the file
+abs_path = Path(__file__).parent.parent.parent
+def load_json_data(file_path):
+    # Load the JSON data
+    OSM_SCORES = pd.read_json(file_path)
+    # Reset index so model names become a column and transpose for (year, name) pairs as rows
+    OSM_SCORES = OSM_SCORES.T.reset_index()
+    # Rename the first column as 'Model' to keep model names visible
+    OSM_SCORES.rename(columns={'index': 'Model'}, inplace=True)
+    # Filter columns that contain 'Egzaminy Gimnazjalne' in the name
+    filtered_columns = ['Model'] + [col for col in OSM_SCORES.columns if "Egzaminy Ósmoklasisty" in col]
+    OSM_SCORES = OSM_SCORES[filtered_columns]
+    OSM_SCORES["Model"] = OSM_SCORES["Model"].apply(
+        lambda name: f"[{name.replace('__','/')}](https://huggingface.co/{name.replace('__','/')})"
+    )
+        # Round numeric values to 2 decimal places
+    numeric_columns = OSM_SCORES.columns[1:]  # Get all year columns
+    OSM_SCORES[numeric_columns] = OSM_SCORES[numeric_columns].apply(pd.to_numeric, errors='coerce') * 100
+    OSM_SCORES[numeric_columns] = OSM_SCORES[numeric_columns].round(2)
+    # Convert year part in column names to strings for Gradio compatibility
+    OSM_SCORES.columns = [col.split(',')[0][1:] if col != 'Model' else col for col in OSM_SCORES.columns]
+    year_columns = OSM_SCORES.columns[1:]
+    sorted_year_columns = sorted(year_columns.astype(str).tolist())  # Sort the year columns as strings
+    sorted_columns = ['Model'] + sorted_year_columns
+    OSM_SCORES = OSM_SCORES[sorted_columns]
+    return OSM_SCORES
+# Define file path
+file_path = str(abs_path / "leaderboards/all_types_years.json")
+OSM_SCORES = load_json_data(file_path)
+OSM_SCORES = OSM_SCORES.style.highlight_max(
+    color = '#ff7070',
+    subset=OSM_SCORES.columns[-5:]).format(precision=2)

src/structures/zaw.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import pandas as pd
+from pathlib import Path
+# Define the absolute path to the file
+abs_path = Path(__file__).parent.parent.parent
+def load_json_data(file_path):
+    # Load the JSON data
+    ZAW_SCORES = pd.read_json(file_path)
+    # Reset index so model names become a column and transpose for (year, name) pairs as rows
+    ZAW_SCORES = ZAW_SCORES.T.reset_index()
+    # Rename the first column as 'Model' to keep model names visible
+    ZAW_SCORES.rename(columns={'index': 'Model'}, inplace=True)
+    # Filter columns that contain 'Egzaminy Gimnazjalne' in the name
+    filtered_columns = ['Model'] + [col for col in ZAW_SCORES.columns if "Egzaminy Zawodowe" in col]
+    ZAW_SCORES = ZAW_SCORES[filtered_columns]
+    ZAW_SCORES["Model"] = ZAW_SCORES["Model"].apply(
+        lambda name: f"[{name.replace('__','/')}](https://huggingface.co/{name.replace('__','/')})"
+    )
+        # Round numeric values to 2 decimal places
+    numeric_columns = ZAW_SCORES.columns[1:]  # Get all year columns
+    ZAW_SCORES[numeric_columns] = ZAW_SCORES[numeric_columns].apply(pd.to_numeric, errors='coerce') * 100
+    ZAW_SCORES[numeric_columns] = ZAW_SCORES[numeric_columns].round(2)
+    # Convert year part in column names to strings for Gradio compatibility
+    ZAW_SCORES.columns = [col.split(',')[0][1:] if col != 'Model' else col for col in ZAW_SCORES.columns]
+    year_columns = ZAW_SCORES.columns[1:]
+    sorted_year_columns = sorted(year_columns.astype(str).tolist())  # Sort the year columns as strings
+    sorted_columns = ['Model'] + sorted_year_columns
+    ZAW_SCORES = ZAW_SCORES[sorted_columns]
+    return ZAW_SCORES
+# Define file path
+file_path = str(abs_path / "leaderboards/all_types_years.json")
+ZAW_SCORES = load_json_data(file_path)
+ZAW_SCORES = ZAW_SCORES.style.highlight_max(
+    color = '#ff7070',
+    subset=ZAW_SCORES.columns[-12:]).format(precision=2)

src/styles.py ADDED Viewed

	@@ -0,0 +1,108 @@

+custom_css = """
+.markdown-text {
+    font-size: 16px !important;
+}
+#models-to-add-text {
+    font-size: 18px !important;
+}
+#citation-button span {
+    font-size: 16px !important;
+}
+#citation-button textarea {
+    font-size: 16px !important;
+}
+#citation-button > label > button {
+    margin: 6px;
+    transform: scale(1.3);
+}
+#leaderboard-table {
+    margin-top: 15px
+}
+#leaderboard-table-lite {
+    margin-top: 15px
+}
+#search-bar-table-box > div:first-child {
+    background: none;
+    border: none;
+}
+#search-bar {
+    padding: 0px;
+}
+/* Hides the final AutoEvalColumn */
+#llm-benchmark-tab-table table td:last-child,
+#llm-benchmark-tab-table table th:last-child {
+    display: none;
+}
+/* Limit the width of the first AutoEvalColumn so that names don't expand too much */
+table td:first-child,
+table th:first-child {
+    max-width: 400px;
+    overflow: auto;
+    white-space: nowrap;
+}
+table > tbody > tr > td:nth-child(3) > div {
+    overflow-x: auto;
+    width: 450px;
+}
+tbody span {
+    -webkit-user-select: text;
+    -moz-user-select: text;
+    -ms-user-select: text;
+    user-select: text;
+}
+.tab-buttons button {
+    font-size: 20px;
+}
+#scale-logo {
+    border-style: none !important;
+    box-shadow: none;
+    display: block;
+    margin-left: auto;
+    margin-right: auto;
+    max-width: 600px;
+}
+#scale-logo .download {
+    display: none;
+}
+#filter_type{
+    border: 0;
+    padding-left: 0;
+    padding-top: 0;
+}
+#filter_type label {
+    display: flex;
+}
+#filter_type label > span{
+    margin-top: var(--spacing-lg);
+    margin-right: 0.5em;
+}
+#filter_type label > .wrap{
+    width: 103px;
+}
+#filter_type label > .wrap .wrap-inner{
+    padding: 2px;
+}
+#filter_type label > .wrap .wrap-inner input{
+    width: 1px
+}
+#filter-columns-type{
+    border:0;
+    padding:0.5;
+}
+#filter-columns-size{
+    border:0;
+    padding:0.5;
+}
+#box-filter > .form{
+    border: 0
+}
+"""
+get_window_url_params = """
+    function(url_params) {
+        const params = new URLSearchParams(window.location.search);
+        url_params = Object.fromEntries(params);
+        return url_params;
+    }
+    """

src/utils.py ADDED Viewed

	@@ -0,0 +1 @@


1	+