lingoly-too

Running

Jude Khouja commited on Mar 5

Commit

6226c1b

1 Parent(s): 36ce9ab

Change description and change color or baseline scores

Files changed (3) hide show

data_loader.py CHANGED Viewed

@@ -269,8 +269,8 @@ HEADER_CONTENT = (
         </div>
         <div class="description">
-            LingOly-TOO (L2) is a challenging reasoning benchmark designed to minimize the chance of answering by guessing.
-            It is developed by rewriting (obfuscating) parts of questions and answers so that the chance of leakage in training data is minimum.
             <div class="highlight-question">
                 "How do top LLMs reason on unseen linguistic questions?"
             </div>

         </div>
         <div class="description">
+            LingOly-TOO (L2) is a challenging linguistics reasoning benchmark designed to counteracts answering without reasoning (e.g. by guessing or memorizing answers).
+            We permute <b>Ling</b>uistics <b>Oly</b>mpiad problems with <b>T</b>emplates and <b>O</b>rthographic <b>O</b>bfuscations. By rewriting (obfuscating) parts of questions and answers, the chance of benchmark leakage in training data is minimized.
             <div class="highlight-question">
                 "How do top LLMs reason on unseen linguistic questions?"
             </div>

tabs/leaderboard.py CHANGED Viewed

@@ -3,13 +3,14 @@ from data_loader import METHODOLOGY
 from utils import (
     get_rank_badge,
     get_score_bar,
     get_type_badge,
 )
 def filter_leaderboard(df, sort_by):
     filtered_df = df.copy()
-    if sort_by == "Score after obfuscation":
         filtered_df = filtered_df.sort_values(by="Obfuscated score", ascending=False)
     else:
         filtered_df = filtered_df.sort_values(by="Baseline score", ascending=False)
@@ -129,7 +130,7 @@ def filter_leaderboard(df, sort_by):
                 <td class="vendor-cell">{row['Provider']}</td>
                 <td>{get_type_badge(row['Type'])}</td>
                 <td class="score-cell">{get_score_bar(row['Obfuscated score'])}</td>
-                <td class="score-cell">{get_score_bar(row['Baseline score'])}</td>
             </tr>
         """
@@ -143,8 +144,8 @@ def create_leaderboard_tab(df, HEADER_CONTENT, CARDS):
     with gr.Row(equal_height=True):
         with gr.Column(scale=0.4):
             sort_by = gr.Dropdown(
-                choices=["Score after obfuscation", "Score on all"],
-                value="Score after obfuscation",
                 label="Sort by",
             )

 from utils import (
     get_rank_badge,
     get_score_bar,
+    get_score_bar_secondary,
     get_type_badge,
 )
 def filter_leaderboard(df, sort_by):
     filtered_df = df.copy()
+    if sort_by == "Score on obfuscated questions":
         filtered_df = filtered_df.sort_values(by="Obfuscated score", ascending=False)
     else:
         filtered_df = filtered_df.sort_values(by="Baseline score", ascending=False)
                 <td class="vendor-cell">{row['Provider']}</td>
                 <td>{get_type_badge(row['Type'])}</td>
                 <td class="score-cell">{get_score_bar(row['Obfuscated score'])}</td>
+                <td class="score-cell">{get_score_bar_secondary(row['Baseline score'])}</td>
             </tr>
         """
     with gr.Row(equal_height=True):
         with gr.Column(scale=0.4):
             sort_by = gr.Dropdown(
+                choices=["Score on obfuscated questions", "Score on all questions"],
+                value="Score on obfuscated questions",
                 label="Sort by",
             )

utils.py CHANGED Viewed

@@ -67,6 +67,37 @@ def get_score_bar(score):
             ">{width:.1f}</span>
         </div>
     """
 def get_chart_colors():
     # if is_dark_theme():
     #     return {

             ">{width:.1f}</span>
         </div>
     """
+def get_score_bar_secondary(score):
+    """Generate HTML for score bar with gradient styling"""
+    width = score * 100
+    return f"""
+        <div style="display: flex; align-items: center; gap: 12px; width: 100%;">
+            <div style="
+                flex-grow: 1;
+                height: 8px;
+                background: var(--score-bg, rgba(255, 255, 255, 0.1));
+                border-radius: 4px;
+                overflow: hidden;
+                max-width: 200px;
+            ">
+                <div style="
+                    width: {width}%;
+                    height: 100%;
+                    background: linear-gradient(90deg, var(--accent-gray, #1f2937), var(--accent-gray-light, #9ca3af));
+                    border-radius: 4px;
+                    transition: width 0.3s ease;
+                "></div>
+            </div>
+            <span style="
+                font-family: 'SF Mono', monospace;
+                font-weight: 600;
+                color: var(--text-primary, #ffffff);
+                min-width: 60px;
+            ">{width:.1f}</span>
+        </div>
+    """
 def get_chart_colors():
     # if is_dark_theme():
     #     return {