Spaces:

MaziyarPanahi
/

FACTS-Leaderboard

Running

App Files Files Community

MaziyarPanahi commited on May 28

Commit

09aab35

1 Parent(s): 59f16cb

new table

Browse files

Files changed (1) hide show

app.py +176 -61

app.py CHANGED Viewed

@@ -3,20 +3,20 @@ import pandas as pd
 import numpy as np
 # Load data from TSV file
-df = pd.read_csv('FACTS.tsv', sep='\t')
 # Clean up the data
 df = df.dropna()  # Remove any rows with missing values
 df.columns = df.columns.str.strip()  # Remove any whitespace from column names
 # Rename columns to match our expected format
-df = df.rename(columns={
-    'model': 'Model Name',
-    'size': 'Size'
-})
 # Create size display format
-df["Size_Display"] = df["Size"].apply(lambda x: f"{int(x)}B" if x == int(x) else f"{x}B")
 # Add size category for filtering
 def get_size_category(size):
@@ -33,6 +33,7 @@ def get_size_category(size):
     else:
         return ">80B"
 df["Size_Category"] = df["Size"].apply(get_size_category)
@@ -77,11 +78,58 @@ def filter_and_search_models(search_query, size_ranges, sort_by):
     # Round numerical values for better display
     for col in ["Separate Grounding Score", "Separate Quality Score", "Combined Score"]:
         display_df = display_df.copy()  # Create a copy to avoid SettingWithCopyWarning
-        display_df[col] = display_df[col].round(6)
     return display_df
 # Create the Gradio interface
 with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as app:
     gr.Markdown("# 🏆 FACTS Grounding Leaderboard")
@@ -127,33 +175,26 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
             total_models = gr.Markdown(f"**Showing {len(df)} models**")
             # Results table below filters
-            results_table = gr.Dataframe(
-                value=filter_and_search_models(
-                    "",
-                    ["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B"],
-                    "Combined Score",
                 ),
-                headers=[
-                    "Rank",
-                    "Model Name",
-                    "Size",
-                    "Separate Grounding Score",
-                    "Separate Quality Score",
-                    "Combined Score",
-                ],
-                datatype=["number", "str", "str", "number", "number", "number"],
                 elem_id="leaderboard-table",
-                interactive=False,
-                wrap=True,
             )
             # Metric explanations at the bottom
             with gr.Accordion("Metric Explanations", open=False):
-                gr.Markdown("""
-                - **Grounding Score**: Measures the model's ability to provide factually accurate responses based on given context
-                - **Quality Score**: Evaluates the overall quality of the model's responses including coherence and relevance
-                - **Combined Score**: A weighted combination of grounding and quality scores representing overall performance
-                """)
         with gr.TabItem("About"):
             gr.Markdown(
@@ -206,7 +247,7 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
     def update_table(search, sizes, sort_by):
         filtered_df = filter_and_search_models(search, sizes, sort_by)
         model_count = f"**Showing {len(filtered_df)} models**"
-        return filtered_df, model_count
     # Connect all inputs to the update function
     search_box.change(
@@ -229,14 +270,46 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
     # Add custom CSS for better styling
     app.css = """
-    #leaderboard-table {
-        font-size: 14px;
         margin-top: 20px;
         max-height: 600px;
         overflow-y: auto;
     }
-    #leaderboard-table td:first-child {
         text-align: center;
         font-weight: 600;
         color: #444;
@@ -244,64 +317,106 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
         width: 60px;
     }
-    #leaderboard-table td:nth-child(2) {
         font-weight: 500;
         max-width: 400px;
     }
-    #leaderboard-table td:nth-child(3) {
         text-align: center;
         font-weight: 500;
         color: #666;
     }
-    #leaderboard-table td:nth-child(n+4) {
         text-align: center;
     }
-    .size-filter {
-        display: flex;
-        flex-wrap: wrap;
-        gap: 15px;
-        margin-top: 10px;
     }
-    .size-filter label {
-        display: flex;
-        align-items: center;
-        margin: 0;
     }
-    .size-filter input[type="checkbox"] {
-        margin-right: 5px;
     }
-    /* Highlight rows based on model family */
-    #leaderboard-table tr:has(td:contains("meta-llama")) {
-        background-color: #fffbf0;
     }
-    #leaderboard-table tr:has(td:contains("deepseek")) {
-        background-color: #f0f8ff;
     }
-    #leaderboard-table tr:has(td:contains("Qwen")) {
-        background-color: #f5fff5;
     }
-    #leaderboard-table tr:has(td:contains("google")) {
         background-color: #fff0f5;
     }
-    /* Header styling */
-    #leaderboard-table th {
-        background-color: #f8f9fa;
-        font-weight: 600;
     }
-    #leaderboard-table th:first-child {
-        width: 60px;
-        text-align: center;
     }
     """

 import numpy as np
 # Load data from TSV file
+df = pd.read_csv("FACTS.tsv", sep="\t")
 # Clean up the data
 df = df.dropna()  # Remove any rows with missing values
 df.columns = df.columns.str.strip()  # Remove any whitespace from column names
 # Rename columns to match our expected format
+df = df.rename(columns={"model": "Model Name", "size": "Size"})
 # Create size display format
+df["Size_Display"] = df["Size"].apply(
+    lambda x: f"{int(x)}B" if x == int(x) else f"{x}B"
+)
 # Add size category for filtering
 def get_size_category(size):
     else:
         return ">80B"
 df["Size_Category"] = df["Size"].apply(get_size_category)
     # Round numerical values for better display
     for col in ["Separate Grounding Score", "Separate Quality Score", "Combined Score"]:
         display_df = display_df.copy()  # Create a copy to avoid SettingWithCopyWarning
+        display_df[col] = display_df[col].round(3)  # Reduced to 3 decimal places
     return display_df
+def create_html_table(df):
+    """Create an HTML table from the dataframe"""
+    html = '<div class="leaderboard-container">'
+    html += '<table class="leaderboard-table">'
+    # Header
+    html += "<thead><tr>"
+    for col in df.columns:
+        html += f"<th>{col}</th>"
+    html += "</tr></thead>"
+    # Body
+    html += "<tbody>"
+    for _, row in df.iterrows():
+        # Add model family class for styling
+        model_name = row["Model Name"]
+        row_class = ""
+        if "meta-llama" in model_name:
+            row_class = "llama-row"
+        elif "deepseek" in model_name:
+            row_class = "deepseek-row"
+        elif "Qwen" in model_name:
+            row_class = "qwen-row"
+        elif "google" in model_name:
+            row_class = "google-row"
+        html += f'<tr class="{row_class}">'
+        for i, col in enumerate(df.columns):
+            cell_class = ""
+            if i == 0:  # Rank column
+                cell_class = "rank-cell"
+            elif i == 1:  # Model name
+                cell_class = "model-cell"
+            elif i == 2:  # Size
+                cell_class = "size-cell"
+            else:  # Score columns
+                cell_class = "score-cell"
+            html += f'<td class="{cell_class}">{row[col]}</td>'
+        html += "</tr>"
+    html += "</tbody>"
+    html += "</table>"
+    html += "</div>"
+    return html
 # Create the Gradio interface
 with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as app:
     gr.Markdown("# 🏆 FACTS Grounding Leaderboard")
             total_models = gr.Markdown(f"**Showing {len(df)} models**")
             # Results table below filters
+            results_table = gr.HTML(
+                value=create_html_table(
+                    filter_and_search_models(
+                        "",
+                        ["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B"],
+                        "Combined Score",
+                    )
                 ),
                 elem_id="leaderboard-table",
             )
             # Metric explanations at the bottom
             with gr.Accordion("Metric Explanations", open=False):
+                gr.Markdown(
+                    """
+                - **Grounding Score**: Percentage of responses where all claims are supported by the context
+                - **Quality Score**: Percentage of responses that adequately address the user's request
+                - **Combined Score**: Percentage of responses that pass both quality and grounding checks
+                """
+                )
         with gr.TabItem("About"):
             gr.Markdown(
     def update_table(search, sizes, sort_by):
         filtered_df = filter_and_search_models(search, sizes, sort_by)
         model_count = f"**Showing {len(filtered_df)} models**"
+        return create_html_table(filtered_df), model_count
     # Connect all inputs to the update function
     search_box.change(
     # Add custom CSS for better styling
     app.css = """
+    .leaderboard-container {
         margin-top: 20px;
         max-height: 600px;
         overflow-y: auto;
+        border-radius: 8px;
+        border: 1px solid #e9ecef;
+    }
+    .leaderboard-table {
+        width: 100%;
+        border-collapse: collapse;
+        font-size: 14px;
+        background: white;
     }
+    .leaderboard-table th {
+        background-color: #f8f9fa;
+        font-weight: 600;
+        padding: 12px 8px;
+        text-align: center;
+        border-bottom: 2px solid #dee2e6;
+        position: sticky;
+        top: 0;
+        z-index: 10;
+    }
+    .leaderboard-table th:first-child {
+        width: 60px;
+    }
+    .leaderboard-table td {
+        padding: 10px 8px;
+        border-bottom: 1px solid #f1f3f4;
+    }
+    .leaderboard-table tbody tr:hover {
+        background-color: #f8f9fa;
+    }
+    .rank-cell {
         text-align: center;
         font-weight: 600;
         color: #444;
         width: 60px;
     }
+    .model-cell {
         font-weight: 500;
         max-width: 400px;
+        word-wrap: break-word;
     }
+    .size-cell {
         text-align: center;
         font-weight: 500;
         color: #666;
+        min-width: 60px;
     }
+    .score-cell {
         text-align: center;
+        font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
+        font-size: 13px;
     }
+    /* Model family row styling */
+    .llama-row {
+        background-color: #fffbf0;
     }
+    .llama-row:hover {
+        background-color: #fef7e0;
     }
+    .deepseek-row {
+        background-color: #f0f8ff;
     }
+    .deepseek-row:hover {
+        background-color: #e6f3ff;
     }
+    .qwen-row {
+        background-color: #f5fff5;
     }
+    .qwen-row:hover {
+        background-color: #eaffea;
     }
+    .google-row {
         background-color: #fff0f5;
     }
+    .google-row:hover {
+        background-color: #ffe6f0;
     }
+    .size-filter {
+        margin-top: 10px;
+    }
+    .size-filter > div {
+        display: flex !important;
+        flex-wrap: wrap !important;
+        gap: 8px !important;
+        align-items: center !important;
+    }
+    .size-filter label {
+        display: flex !important;
+        align-items: center !important;
+        background: #f8f9fa !important;
+        border: 2px solid #e9ecef !important;
+        border-radius: 8px !important;
+        padding: 8px 12px !important;
+        margin: 0 !important;
+        cursor: pointer !important;
+        transition: all 0.2s ease !important;
+        font-weight: 500 !important;
+        font-size: 14px !important;
+        color: #495057 !important;
+        min-width: 70px !important;
+        justify-content: center !important;
+    }
+    .size-filter label:hover {
+        background: #e9ecef !important;
+        border-color: #6c757d !important;
+    }
+    .size-filter input[type="checkbox"] {
+        display: none !important;
+    }
+    .size-filter input[type="checkbox"]:checked + span {
+        background: #0d6efd !important;
+        color: white !important;
+        border-color: #0d6efd !important;
+    }
+    .size-filter label:has(input[type="checkbox"]:checked) {
+        background: #0d6efd !important;
+        color: white !important;
+        border-color: #0d6efd !important;
+        box-shadow: 0 2px 4px rgba(13, 110, 253, 0.2) !important;
     }
     """