Spaces:

vectara
/

leaderboard

Running on CPU Upgrade

App Files Files Community

ofermend commited on Dec 10, 2025

Commit

7cd85bf

1 Parent(s): a1ac629

updated visuals to better reflect answer rate

Browse files

Files changed (3) hide show

app/app.py +12 -7
app/app_utils.py +64 -37
app/requirements.txt +2 -1

app/app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import pandas as pd
-import matplotlib.pyplot
 from app_utils import load_results, visualize_leaderboard
@@ -45,12 +45,16 @@ def leaderboard(
     if len(df) == 0:
         # Show "no results" message in the plot
-        fig, ax = matplotlib.pyplot.subplots(figsize=(10, 5))
-        ax.text(0.5, 0.5, "No models found matching your filter",
-                ha='center', va='center', fontsize=14, color='gray')
-        ax.set_xlim(0, 1)
-        ax.set_ylim(0, 1)
-        ax.axis('off')
         return fig, pd.DataFrame(columns=["LLM", "Hallucination %", "Answer %", "Avg Summary Words"])
     fig = visualize_leaderboard(df)
@@ -71,6 +75,7 @@ with gr.Blocks(
         height: 40px;
     }
     footer { display: none !important; }
     """
 ) as demo:
     gr.HTML(

 import gradio as gr
 import pandas as pd
+import plotly.graph_objects as go
 from app_utils import load_results, visualize_leaderboard
     if len(df) == 0:
         # Show "no results" message in the plot
+        fig = go.Figure()
+        fig.add_annotation(
+            text="No models found matching your filter",
+            xref="paper", yref="paper", x=0.5, y=0.5,
+            showarrow=False, font=dict(size=14, color="gray")
+        )
+        fig.update_layout(
+            xaxis=dict(visible=False), yaxis=dict(visible=False),
+            height=400, margin=dict(l=50, r=50, t=50, b=50)
+        )
         return fig, pd.DataFrame(columns=["LLM", "Hallucination %", "Answer %", "Avg Summary Words"])
     fig = visualize_leaderboard(df)
         height: 40px;
     }
     footer { display: none !important; }
+    .modebar { display: none !important; }
     """
 ) as demo:
     gr.HTML(

app/app_utils.py CHANGED Viewed

@@ -3,11 +3,11 @@ import os
 import json
 from huggingface_hub import snapshot_download
 import pandas as pd
-import matplotlib.pyplot as plt
-import matplotlib.figure
 from datetime import datetime
 from sklearn.preprocessing import MinMaxScaler
-import matplotlib.patheffects as pe
 min_max_scaler = MinMaxScaler()
@@ -140,45 +140,72 @@ def determine_llm_x_position_and_font_color(LLM: str, hallucination_percent: flo
     else: # to the right of the bar, black anyway
         return hallucination_percent, 'black'
-def visualize_leaderboard(df: pd.DataFrame) -> matplotlib.figure.Figure:
-    fig = plt.figure(figsize=(10, 5))
     plot_df = df.head(10).copy()
-    plot_df["normalized_hallucination_rate"] = min_max_scaler.fit_transform(plot_df[["Hallucination %"]])
-    # Reverse order so lowest hallucination is at top
-    plot_df = plot_df.iloc[::-1]
-    y_positions = range(len(plot_df))
-    plt.barh(y_positions, plot_df["Hallucination %"], color=plt.cm.RdYlGn_r(plot_df["normalized_hallucination_rate"]))
-    # Add value labels to the right of bars and answer rate dots at bar end
-    for i, row in enumerate(plot_df.itertuples()):
-        plt.text(row._2 + 0.2, i, f"{row._2}%", ha='left', va='center', fontsize=8, fontweight='bold')
-        # Answer rate indicator - colored dot at end of bar
-        ar_dot_color = '#22aa22' if row._3 >= 95 else '#cc3333'
-        plt.scatter(row._2, i, color=ar_dot_color, s=25, zorder=5)
-    # Strip org prefix (e.g., "google/gemini-2.5" -> "gemini-2.5")
     labels = [name.split("/")[-1] for name in plot_df["LLM"]]
-    plt.yticks(y_positions, labels, fontsize=8)
-    plt.xlabel("Hallucination Rate", fontsize=10)
-    plt.title("Grounded Hallucination Rate of Best LLMs", fontsize=12)
-    plt.gca().spines['top'].set_visible(False)
-    plt.gca().spines['right'].set_visible(False)
-    # Add legend for answer rate dots
-    plt.scatter([], [], color='#22aa22', s=25, label='≥95%')
-    plt.scatter([], [], color='#cc3333', s=25, label='<95%')
-    plt.legend(loc='upper right', fontsize=8, framealpha=0.9, title='Answer Rate', title_fontsize=8)
-    plt.tight_layout()
-    plt.subplots_adjust(left=0.25, bottom=0.15)
-    # Add copyright at bottom
-    plt.figtext(0.5, 0.02, f"Copyright (2025) Vectara, Inc. - Plot generated on {datetime.now().strftime('%B %d, %Y')}",
-                ha='center', fontsize=10)
     return fig
 # %%

 import json
 from huggingface_hub import snapshot_download
 import pandas as pd
+import matplotlib.cm as cm
+from matplotlib.colors import to_hex
+import plotly.graph_objects as go
 from datetime import datetime
 from sklearn.preprocessing import MinMaxScaler
 min_max_scaler = MinMaxScaler()
     else: # to the right of the bar, black anyway
         return hallucination_percent, 'black'
+def visualize_leaderboard(df: pd.DataFrame) -> go.Figure:
+    """Create interactive horizontal bar chart with warning icons for low answer rate."""
     plot_df = df.head(10).copy()
+    plot_df["normalized_hallucination_rate"] = min_max_scaler.fit_transform(
+        plot_df[["Hallucination %"]]
+    )
+    plot_df = plot_df.iloc[::-1]  # Reverse for bottom-to-top display
+    # Strip org prefix for labels
     labels = [name.split("/")[-1] for name in plot_df["LLM"]]
+    # Calculate colors (RdYlGn_r) and patterns (hatched for low AR)
+    colors = []
+    patterns = []
+    for _, row in plot_df.iterrows():
+        colors.append(to_hex(cm.RdYlGn_r(row["normalized_hallucination_rate"])))
+        patterns.append("/" if row["Answer %"] < 95 else "")
+    # Hover text with full details
+    hover_texts = [
+        f"<b>{label}</b><br>"
+        f"Hallucination Rate: {row['Hallucination %']}%<br>"
+        f"Answer Rate: {row['Answer %']}%"
+        + (" ✓" if row["Answer %"] >= 95 else " (below 95%)")
+        for label, (_, row) in zip(labels, plot_df.iterrows())
+    ]
+    fig = go.Figure()
+    fig.add_trace(go.Bar(
+        y=labels,
+        x=plot_df["Hallucination %"],
+        orientation='h',
+        marker=dict(
+            color=colors,
+            pattern_shape=patterns,
+            pattern_fillmode="overlay",
+            line=dict(width=0)
+        ),
+        text=[f"{val}%" for val in plot_df["Hallucination %"]],
+        textposition='outside',
+        textfont=dict(size=10, color='black'),
+        hovertemplate="%{customdata}<extra></extra>",
+        customdata=hover_texts
+    ))
+    # Title with copyright
+    title_text = (
+        f"Grounded Hallucination Rate of Best LLMs · "
+        f"© {datetime.now().year} Vectara · Created {datetime.now().strftime('%B %d, %Y')}"
+    )
+    fig.update_layout(
+        title=dict(text=title_text, font=dict(size=13), x=0.5, xanchor='center'),
+        xaxis=dict(title="Hallucination Rate", range=[0, max(plot_df["Hallucination %"]) * 1.15]),
+        yaxis=dict(title=""),
+        showlegend=False,
+        height=400,
+        margin=dict(l=180, r=50, t=50, b=40),
+        annotations=[
+            dict(
+                text="Striped = Answer Rate < 95%",
+                xref="paper", yref="paper", x=1.0, y=0.98,
+                showarrow=False, font=dict(size=10, color="gray"), xanchor="right", yanchor="top"
+            )
+        ]
+    )
     return fig
 # %%

app/requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ requests==2.32.5
 pandas==2.2.3
 huggingface_hub>=0.20.0
 matplotlib==3.10.3
-scikit-learn==1.6.1

 pandas==2.2.3
 huggingface_hub>=0.20.0
 matplotlib==3.10.3
+scikit-learn==1.6.1
+plotly>=5.18.0