ofermend commited on
Commit
7cd85bf
·
1 Parent(s): a1ac629

updated visuals to better reflect answer rate

Browse files
Files changed (3) hide show
  1. app/app.py +12 -7
  2. app/app_utils.py +64 -37
  3. app/requirements.txt +2 -1
app/app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import pandas as pd
3
- import matplotlib.pyplot
4
 
5
  from app_utils import load_results, visualize_leaderboard
6
 
@@ -45,12 +45,16 @@ def leaderboard(
45
 
46
  if len(df) == 0:
47
  # Show "no results" message in the plot
48
- fig, ax = matplotlib.pyplot.subplots(figsize=(10, 5))
49
- ax.text(0.5, 0.5, "No models found matching your filter",
50
- ha='center', va='center', fontsize=14, color='gray')
51
- ax.set_xlim(0, 1)
52
- ax.set_ylim(0, 1)
53
- ax.axis('off')
 
 
 
 
54
  return fig, pd.DataFrame(columns=["LLM", "Hallucination %", "Answer %", "Avg Summary Words"])
55
 
56
  fig = visualize_leaderboard(df)
@@ -71,6 +75,7 @@ with gr.Blocks(
71
  height: 40px;
72
  }
73
  footer { display: none !important; }
 
74
  """
75
  ) as demo:
76
  gr.HTML(
 
1
  import gradio as gr
2
  import pandas as pd
3
+ import plotly.graph_objects as go
4
 
5
  from app_utils import load_results, visualize_leaderboard
6
 
 
45
 
46
  if len(df) == 0:
47
  # Show "no results" message in the plot
48
+ fig = go.Figure()
49
+ fig.add_annotation(
50
+ text="No models found matching your filter",
51
+ xref="paper", yref="paper", x=0.5, y=0.5,
52
+ showarrow=False, font=dict(size=14, color="gray")
53
+ )
54
+ fig.update_layout(
55
+ xaxis=dict(visible=False), yaxis=dict(visible=False),
56
+ height=400, margin=dict(l=50, r=50, t=50, b=50)
57
+ )
58
  return fig, pd.DataFrame(columns=["LLM", "Hallucination %", "Answer %", "Avg Summary Words"])
59
 
60
  fig = visualize_leaderboard(df)
 
75
  height: 40px;
76
  }
77
  footer { display: none !important; }
78
+ .modebar { display: none !important; }
79
  """
80
  ) as demo:
81
  gr.HTML(
app/app_utils.py CHANGED
@@ -3,11 +3,11 @@ import os
3
  import json
4
  from huggingface_hub import snapshot_download
5
  import pandas as pd
6
- import matplotlib.pyplot as plt
7
- import matplotlib.figure
 
8
  from datetime import datetime
9
  from sklearn.preprocessing import MinMaxScaler
10
- import matplotlib.patheffects as pe
11
 
12
  min_max_scaler = MinMaxScaler()
13
 
@@ -140,45 +140,72 @@ def determine_llm_x_position_and_font_color(LLM: str, hallucination_percent: flo
140
  else: # to the right of the bar, black anyway
141
  return hallucination_percent, 'black'
142
 
143
- def visualize_leaderboard(df: pd.DataFrame) -> matplotlib.figure.Figure:
144
- fig = plt.figure(figsize=(10, 5))
145
  plot_df = df.head(10).copy()
146
- plot_df["normalized_hallucination_rate"] = min_max_scaler.fit_transform(plot_df[["Hallucination %"]])
147
-
148
- # Reverse order so lowest hallucination is at top
149
- plot_df = plot_df.iloc[::-1]
150
- y_positions = range(len(plot_df))
151
-
152
- plt.barh(y_positions, plot_df["Hallucination %"], color=plt.cm.RdYlGn_r(plot_df["normalized_hallucination_rate"]))
153
-
154
- # Add value labels to the right of bars and answer rate dots at bar end
155
- for i, row in enumerate(plot_df.itertuples()):
156
- plt.text(row._2 + 0.2, i, f"{row._2}%", ha='left', va='center', fontsize=8, fontweight='bold')
157
- # Answer rate indicator - colored dot at end of bar
158
- ar_dot_color = '#22aa22' if row._3 >= 95 else '#cc3333'
159
- plt.scatter(row._2, i, color=ar_dot_color, s=25, zorder=5)
160
 
161
- # Strip org prefix (e.g., "google/gemini-2.5" -> "gemini-2.5")
162
  labels = [name.split("/")[-1] for name in plot_df["LLM"]]
163
- plt.yticks(y_positions, labels, fontsize=8)
164
- plt.xlabel("Hallucination Rate", fontsize=10)
165
- plt.title("Grounded Hallucination Rate of Best LLMs", fontsize=12)
166
-
167
- plt.gca().spines['top'].set_visible(False)
168
- plt.gca().spines['right'].set_visible(False)
169
 
170
- # Add legend for answer rate dots
171
- plt.scatter([], [], color='#22aa22', s=25, label='≥95%')
172
- plt.scatter([], [], color='#cc3333', s=25, label='<95%')
173
- plt.legend(loc='upper right', fontsize=8, framealpha=0.9, title='Answer Rate', title_fontsize=8)
174
-
175
- plt.tight_layout()
176
- plt.subplots_adjust(left=0.25, bottom=0.15)
177
-
178
- # Add copyright at bottom
179
- plt.figtext(0.5, 0.02, f"Copyright (2025) Vectara, Inc. - Plot generated on {datetime.now().strftime('%B %d, %Y')}",
180
- ha='center', fontsize=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  return fig
183
 
184
  # %%
 
3
  import json
4
  from huggingface_hub import snapshot_download
5
  import pandas as pd
6
+ import matplotlib.cm as cm
7
+ from matplotlib.colors import to_hex
8
+ import plotly.graph_objects as go
9
  from datetime import datetime
10
  from sklearn.preprocessing import MinMaxScaler
 
11
 
12
  min_max_scaler = MinMaxScaler()
13
 
 
140
  else: # to the right of the bar, black anyway
141
  return hallucination_percent, 'black'
142
 
143
+ def visualize_leaderboard(df: pd.DataFrame) -> go.Figure:
144
+ """Create interactive horizontal bar chart with warning icons for low answer rate."""
145
  plot_df = df.head(10).copy()
146
+ plot_df["normalized_hallucination_rate"] = min_max_scaler.fit_transform(
147
+ plot_df[["Hallucination %"]]
148
+ )
149
+ plot_df = plot_df.iloc[::-1] # Reverse for bottom-to-top display
 
 
 
 
 
 
 
 
 
 
150
 
151
+ # Strip org prefix for labels
152
  labels = [name.split("/")[-1] for name in plot_df["LLM"]]
 
 
 
 
 
 
153
 
154
+ # Calculate colors (RdYlGn_r) and patterns (hatched for low AR)
155
+ colors = []
156
+ patterns = []
157
+ for _, row in plot_df.iterrows():
158
+ colors.append(to_hex(cm.RdYlGn_r(row["normalized_hallucination_rate"])))
159
+ patterns.append("/" if row["Answer %"] < 95 else "")
160
+
161
+ # Hover text with full details
162
+ hover_texts = [
163
+ f"<b>{label}</b><br>"
164
+ f"Hallucination Rate: {row['Hallucination %']}%<br>"
165
+ f"Answer Rate: {row['Answer %']}%"
166
+ + (" ✓" if row["Answer %"] >= 95 else " (below 95%)")
167
+ for label, (_, row) in zip(labels, plot_df.iterrows())
168
+ ]
169
+
170
+ fig = go.Figure()
171
+ fig.add_trace(go.Bar(
172
+ y=labels,
173
+ x=plot_df["Hallucination %"],
174
+ orientation='h',
175
+ marker=dict(
176
+ color=colors,
177
+ pattern_shape=patterns,
178
+ pattern_fillmode="overlay",
179
+ line=dict(width=0)
180
+ ),
181
+ text=[f"{val}%" for val in plot_df["Hallucination %"]],
182
+ textposition='outside',
183
+ textfont=dict(size=10, color='black'),
184
+ hovertemplate="%{customdata}<extra></extra>",
185
+ customdata=hover_texts
186
+ ))
187
+
188
+ # Title with copyright
189
+ title_text = (
190
+ f"Grounded Hallucination Rate of Best LLMs · "
191
+ f"© {datetime.now().year} Vectara · Created {datetime.now().strftime('%B %d, %Y')}"
192
+ )
193
 
194
+ fig.update_layout(
195
+ title=dict(text=title_text, font=dict(size=13), x=0.5, xanchor='center'),
196
+ xaxis=dict(title="Hallucination Rate", range=[0, max(plot_df["Hallucination %"]) * 1.15]),
197
+ yaxis=dict(title=""),
198
+ showlegend=False,
199
+ height=400,
200
+ margin=dict(l=180, r=50, t=50, b=40),
201
+ annotations=[
202
+ dict(
203
+ text="Striped = Answer Rate < 95%",
204
+ xref="paper", yref="paper", x=1.0, y=0.98,
205
+ showarrow=False, font=dict(size=10, color="gray"), xanchor="right", yanchor="top"
206
+ )
207
+ ]
208
+ )
209
  return fig
210
 
211
  # %%
app/requirements.txt CHANGED
@@ -3,4 +3,5 @@ requests==2.32.5
3
  pandas==2.2.3
4
  huggingface_hub>=0.20.0
5
  matplotlib==3.10.3
6
- scikit-learn==1.6.1
 
 
3
  pandas==2.2.3
4
  huggingface_hub>=0.20.0
5
  matplotlib==3.10.3
6
+ scikit-learn==1.6.1
7
+ plotly>=5.18.0