openhands openhands commited on
Commit
5f628a6
·
1 Parent(s): dfa8bfc

fix: Add Visualization column to main table (not just benchmark tables)

Browse files

The previous commit only added visualization links to individual benchmark
detail tables. This commit adds the Visualization column to the main front
page table as well.

Changes:
- Add 'visualization' column to record in simple_data_loader.py
- Add 'Visualization' to ending_cols in leaderboard_transformer.py
- Add make_visualization_icons() to format the column in ui_components.py
- Update column widths and datatypes to include Visualization
- Add tooltip description for Visualization column

Co-authored-by: openhands <openhands@all-hands.dev>

leaderboard_transformer.py CHANGED
@@ -814,7 +814,7 @@ class DataTransformer:
814
  # --- 3. Add Columns for Agent Openness ---
815
  base_cols = ["id","Language Model","SDK Version","Source"]
816
  new_cols = ["Openness"]
817
- ending_cols = ["Date", "Logs"]
818
 
819
  # For Overall view, use "Average Cost" and "Average Runtime" (per instance across all benchmarks)
820
  if tag is None or tag == "Overall":
 
814
  # --- 3. Add Columns for Agent Openness ---
815
  base_cols = ["id","Language Model","SDK Version","Source"]
816
  new_cols = ["Openness"]
817
+ ending_cols = ["Date", "Logs", "Visualization"]
818
 
819
  # For Overall view, use "Average Cost" and "Average Runtime" (per instance across all benchmarks)
820
  if tag is None or tag == "Overall":
simple_data_loader.py CHANGED
@@ -239,6 +239,7 @@ class SimpleLeaderboardViewer:
239
  'id': agent_id,
240
  'source': first_record.get('source', ''), # Will become "Source"
241
  'logs': first_record.get('logs', ''), # Will become "Logs"
 
242
  }
243
 
244
  # Add per-dataset scores and costs
 
239
  'id': agent_id,
240
  'source': first_record.get('source', ''), # Will become "Source"
241
  'logs': first_record.get('logs', ''), # Will become "Logs"
242
+ 'visualization': '', # Will become "Visualization" - populated below
243
  }
244
 
245
  # Add per-dataset scores and costs
ui_components.py CHANGED
@@ -170,6 +170,7 @@ def build_descriptions_tooltip_content(table) -> str:
170
  <div class="tooltip-description-item"><b>Information Gathering Cost:</b> Macro-average cost per instance (USD) across Information Gathering benchmarks.</div>
171
  <div class="tooltip-description-item"><b>Categories Attempted:</b> Number of core categories with at least one benchmark attempted (out of 5).</div>
172
  <div class="tooltip-description-item"><b>Logs:</b> View evaluation run logs (e.g., outputs, traces).</div>
 
173
  """
174
  elif table in ["Issue Resolution", "Frontend", "Greenfield", "Testing", "Information Gathering"]:
175
  return f"""
@@ -735,7 +736,7 @@ def create_leaderboard_display(
735
  df_headers = df_display_all.columns.tolist()
736
  df_datatypes = []
737
  for col in df_headers:
738
- if col == "Logs" or "Cost" in col or "Score" in col or "Runtime" in col:
739
  df_datatypes.append("markdown")
740
  elif col in ["SDK Version", "Language Model"]:
741
  df_datatypes.append("html")
@@ -750,7 +751,7 @@ def create_leaderboard_display(
750
  if "Score" in col or "Cost" in col or "Runtime" in col:
751
  num_score_cost_runtime_cols += 1
752
  dynamic_widths = [90] * num_score_cost_runtime_cols
753
- fixed_end_widths = [90, 100, 120] # Categories Attempted, Date, Logs
754
  # 5. Combine all the lists to create the final, fully dynamic list.
755
  final_column_widths = fixed_start_widths + dynamic_widths + fixed_end_widths
756
 
@@ -1294,6 +1295,20 @@ def get_full_leaderboard_data(split: str) -> tuple[pd.DataFrame, dict]:
1294
  parts.append(f'<a href="{url}" title="Download {benchmark} results" target="_blank">⬇️</a>')
1295
  return " ".join(parts) if parts else ""
1296
  pretty_df["Logs"] = pretty_df.index.map(make_download_icons)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1297
 
1298
  if "Source" in pretty_df.columns:
1299
  def format_source_url_to_html(raw_url):
 
170
  <div class="tooltip-description-item"><b>Information Gathering Cost:</b> Macro-average cost per instance (USD) across Information Gathering benchmarks.</div>
171
  <div class="tooltip-description-item"><b>Categories Attempted:</b> Number of core categories with at least one benchmark attempted (out of 5).</div>
172
  <div class="tooltip-description-item"><b>Logs:</b> View evaluation run logs (e.g., outputs, traces).</div>
173
+ <div class="tooltip-description-item"><b>Visualization:</b> View interactive evaluation visualization (when available).</div>
174
  """
175
  elif table in ["Issue Resolution", "Frontend", "Greenfield", "Testing", "Information Gathering"]:
176
  return f"""
 
736
  df_headers = df_display_all.columns.tolist()
737
  df_datatypes = []
738
  for col in df_headers:
739
+ if col in ["Logs", "Visualization"] or "Cost" in col or "Score" in col or "Runtime" in col:
740
  df_datatypes.append("markdown")
741
  elif col in ["SDK Version", "Language Model"]:
742
  df_datatypes.append("html")
 
751
  if "Score" in col or "Cost" in col or "Runtime" in col:
752
  num_score_cost_runtime_cols += 1
753
  dynamic_widths = [90] * num_score_cost_runtime_cols
754
+ fixed_end_widths = [90, 100, 120, 120] # Categories Attempted, Date, Logs, Visualization
755
  # 5. Combine all the lists to create the final, fully dynamic list.
756
  final_column_widths = fixed_start_widths + dynamic_widths + fixed_end_widths
757
 
 
1295
  parts.append(f'<a href="{url}" title="Download {benchmark} results" target="_blank">⬇️</a>')
1296
  return " ".join(parts) if parts else ""
1297
  pretty_df["Logs"] = pretty_df.index.map(make_download_icons)
1298
+
1299
+ if "Visualization" in pretty_df.columns:
1300
+ # Format Visualization column with bar chart icons using raw_df data
1301
+ benchmarks = ['swe-bench', 'swe-bench-multimodal', 'commit0', 'swt-bench', 'gaia']
1302
+ def make_visualization_icons(idx):
1303
+ parts = []
1304
+ for benchmark in benchmarks:
1305
+ viz_col = f"{benchmark} visualization"
1306
+ if viz_col in raw_df.columns:
1307
+ url = raw_df.loc[idx, viz_col] if idx in raw_df.index else None
1308
+ if pd.notna(url) and url != "":
1309
+ parts.append(f'<a href="{url}" title="Visualize {benchmark} results" target="_blank">📊</a>')
1310
+ return " ".join(parts) if parts else ""
1311
+ pretty_df["Visualization"] = pretty_df.index.map(make_visualization_icons)
1312
 
1313
  if "Source" in pretty_df.columns:
1314
  def format_source_url_to_html(raw_url):