Arabic-MMMLU-Leaderborad

Running

App Files Files Community

Omartificial-Intelligence-Space commited on Feb 27, 2025

Commit

e1da145

verified ·

1 Parent(s): 64f6484

update app.py

Browse files

Files changed (1) hide show

app.py +90 -10

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import gradio as gr
-from gradio_leaderboard import Leaderboard
 import pandas as pd
 import os
 import json
@@ -10,7 +9,7 @@ from src.envs import EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH
 # Ensure directories exist
 os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
-# Minimal CSS to avoid conflicts
 minimal_css = """
 .container {
     max-width: 1200px;
@@ -26,7 +25,6 @@ try:
     # Load the leaderboard DataFrame
     LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
     print("LEADERBOARD_DF Shape:", LEADERBOARD_DF.shape)
-    print("Sample row:", LEADERBOARD_DF.iloc[0].to_dict() if not LEADERBOARD_DF.empty else "Empty DataFrame")
     # If DataFrame is empty, create a sample
     if LEADERBOARD_DF.empty:
@@ -45,7 +43,29 @@ except Exception as e:
         "average": 0
     }])
-# Create a very simple app with just the leaderboard
 with gr.Blocks(css=minimal_css) as demo:
     gr.HTML("<div class='header'><h1>ILMAAM: Index for Language Models for Arabic Assessment on Multitasks</h1></div>")
@@ -53,15 +73,75 @@ with gr.Blocks(css=minimal_css) as demo:
         with gr.TabItem("LLM Benchmark"):
             # Add debug output
             with gr.Accordion("Debug Info", open=True):
-                gr.Markdown(f"DataFrame Shape: {LEADERBOARD_DF.shape}")
-                gr.Markdown(f"Column Names: {', '.join(LEADERBOARD_DF.columns[:10])}...")
-            # Create a simplified version of the leaderboard
-            leaderboard = Leaderboard(
-                value=LEADERBOARD_DF,
-                interactive=True,
             )
         with gr.TabItem("About"):
             gr.Markdown("This is a benchmark for Arabic language models.")

 import gradio as gr
 import pandas as pd
 import os
 import json
 # Ensure directories exist
 os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
+# Minimal CSS
 minimal_css = """
 .container {
     max-width: 1200px;
     # Load the leaderboard DataFrame
     LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
     print("LEADERBOARD_DF Shape:", LEADERBOARD_DF.shape)
     # If DataFrame is empty, create a sample
     if LEADERBOARD_DF.empty:
         "average": 0
     }])
+# Select common columns for display
+display_cols = ["model_name", "average"]
+# Add some subject columns if they exist
+subject_cols = ["abstract_algebra", "anatomy", "astronomy", "business_ethics"]
+for col in subject_cols:
+    if col in LEADERBOARD_DF.columns:
+        display_cols.append(col)
+# Add model metadata if they exist
+meta_cols = ["model_type", "precision", "weight_type", "license"]
+for col in meta_cols:
+    if col in LEADERBOARD_DF.columns:
+        display_cols.append(col)
+# Filter the DataFrame to only include display columns that actually exist
+actual_display_cols = [col for col in display_cols if col in LEADERBOARD_DF.columns]
+display_df = LEADERBOARD_DF[actual_display_cols].copy()
+# Round numeric columns for display
+for col in display_df.columns:
+    if pd.api.types.is_numeric_dtype(display_df[col]):
+        display_df[col] = display_df[col].round(2)
+# Create a very simple app using standard DataTable instead of Leaderboard
 with gr.Blocks(css=minimal_css) as demo:
     gr.HTML("<div class='header'><h1>ILMAAM: Index for Language Models for Arabic Assessment on Multitasks</h1></div>")
         with gr.TabItem("LLM Benchmark"):
             # Add debug output
             with gr.Accordion("Debug Info", open=True):
+                gr.Markdown(f"DataFrame Shape: {display_df.shape}")
+                gr.Markdown(f"Column Names: {', '.join(display_df.columns)}")
+            # Use standard DataTable instead of Leaderboard
+            datatable = gr.DataFrame(
+                value=display_df,
+                interactive=False,
+                wrap=True,
+                column_widths=[200] + [100] * (len(actual_display_cols) - 1)
             )
+            # Add filter functionality using dropdowns
+            with gr.Row():
+                if "model_type" in display_df.columns:
+                    model_types = ["All"] + sorted(display_df["model_type"].unique().tolist())
+                    model_type_filter = gr.Dropdown(
+                        choices=model_types,
+                        value="All",
+                        label="Filter by Model Type",
+                        interactive=True
+                    )
+                if "precision" in display_df.columns:
+                    precisions = ["All"] + sorted(display_df["precision"].unique().tolist())
+                    precision_filter = gr.Dropdown(
+                        choices=precisions,
+                        value="All",
+                        label="Filter by Precision",
+                        interactive=True
+                    )
+                search_input = gr.Textbox(
+                    label="Search by Model Name",
+                    placeholder="Enter model name...",
+                    interactive=True
+                )
+            # Filter function
+            def filter_data(model_type, precision, search):
+                filtered_df = display_df.copy()
+                if model_type != "All" and "model_type" in filtered_df.columns:
+                    filtered_df = filtered_df[filtered_df["model_type"] == model_type]
+                if precision != "All" and "precision" in filtered_df.columns:
+                    filtered_df = filtered_df[filtered_df["precision"] == precision]
+                if search and "model_name" in filtered_df.columns:
+                    filtered_df = filtered_df[filtered_df["model_name"].str.contains(search, case=False)]
+                return filtered_df
+            # Connect filters
+            filter_inputs = []
+            if "model_type" in display_df.columns:
+                filter_inputs.append(model_type_filter)
+            if "precision" in display_df.columns:
+                filter_inputs.append(precision_filter)
+            filter_inputs.append(search_input)
+            # If we have filter inputs, connect them
+            if filter_inputs:
+                for input_component in filter_inputs:
+                    input_component.change(
+                        filter_data,
+                        inputs=filter_inputs,
+                        outputs=datatable
+                    )
         with gr.TabItem("About"):
             gr.Markdown("This is a benchmark for Arabic language models.")