openhands openhands commited on
Commit
6737ff3
·
1 Parent(s): af81bcf

UI cleanup and About page updates

Browse files

1. Removed legend box from graph (OpenHands/Pareto/Company Logos)
2. Combined Pareto, lock, and company logo columns into Language Model column
3. Removed dataset structure SVG image from main page
4. Updated Acknowledgements section - shortened AstaBench credit, added links to component benchmarks
5. Updated Citation to be for OpenHands Index with authors: Juan Michelini, Simon Rosenberg, Xingyao Wang, Graham Neubig

Co-authored-by: openhands <openhands@all-hands.dev>

Files changed (3) hide show
  1. about.py +15 -17
  2. main_page.py +0 -11
  3. ui_components.py +89 -84
about.py CHANGED
@@ -155,23 +155,21 @@ def build_page():
155
  """
156
  <h2>Acknowledgements</h2>
157
  <p>
158
- The OpenHands Index leaderboard interface and visualization components are adapted from the
159
  <a href="https://huggingface.co/spaces/allenai/asta-bench-leaderboard" target="_blank" class="primary-link-button">AstaBench Leaderboard</a>
160
- developed by the Allen Institute for AI. We thank the AstaBench team for their excellent work in creating
161
- a clear and effective leaderboard design that we have customized for the software engineering domain.
162
  </p>
163
  <p>
164
- Key aspects adapted from AstaBench include:
165
  </p>
166
  <ul class="info-list">
167
- <li>Macro-averaging methodology for computing overall scores from category-level averages</li>
168
- <li>Interactive data visualization and filtering components</li>
169
- <li>Leaderboard UI structure and styling</li>
 
 
 
170
  </ul>
171
- <p>
172
- We have extended and modified this foundation to support software engineering benchmarks and the
173
- specific requirements of evaluating AI coding agents.
174
- </p>
175
  """
176
  )
177
  gr.Markdown("---", elem_classes="divider-line")
@@ -181,14 +179,14 @@ def build_page():
181
  """
182
  <h2>Citation</h2>
183
  <p>
184
- If you use OpenHands or reference the OpenHands Index in your work, please cite:
185
  </p>
186
  <pre class="citation-block">
187
- @misc{openhands2024,
188
- title={OpenHands: An Open Platform for AI Software Developers as Generalist Agents},
189
- author={OpenHands Team},
190
- year={2024},
191
- howpublished={https://github.com/OpenHands/OpenHands}
192
  }</pre>
193
  """
194
  )
 
155
  """
156
  <h2>Acknowledgements</h2>
157
  <p>
158
+ The OpenHands Index leaderboard interface is adapted from the
159
  <a href="https://huggingface.co/spaces/allenai/asta-bench-leaderboard" target="_blank" class="primary-link-button">AstaBench Leaderboard</a>
160
+ developed by the Allen Institute for AI.
 
161
  </p>
162
  <p>
163
+ We thank the teams behind the component benchmarks used in OpenHands Index:
164
  </p>
165
  <ul class="info-list">
166
+ <li><a href="https://www.swebench.com/" target="_blank">SWE-bench</a> - Princeton NLP Group</li>
167
+ <li><a href="https://github.com/multi-swe-bench/multi-swe-bench" target="_blank">Multi-SWE-bench</a> - Multi-SWE-bench Team</li>
168
+ <li><a href="https://github.com/OpenHands/SWE-bench-multimodal" target="_blank">SWE-bench Multimodal</a> - OpenHands Team</li>
169
+ <li><a href="https://github.com/logic-star-ai/swt-bench" target="_blank">SWT-bench</a> - Logic Star AI</li>
170
+ <li><a href="https://github.com/commit-0/commit0" target="_blank">Commit0</a> - Commit0 Team</li>
171
+ <li><a href="https://huggingface.co/gaia-benchmark" target="_blank">GAIA</a> - Hugging Face & Meta AI</li>
172
  </ul>
 
 
 
 
173
  """
174
  )
175
  gr.Markdown("---", elem_classes="divider-line")
 
179
  """
180
  <h2>Citation</h2>
181
  <p>
182
+ If you reference the OpenHands Index in your work, please cite:
183
  </p>
184
  <pre class="citation-block">
185
+ @misc{openhandsindex2025,
186
+ title={OpenHands Index: A Comprehensive Leaderboard for AI Coding Agents},
187
+ author={Juan Michelini and Simon Rosenberg and Xingyao Wang and Graham Neubig},
188
+ year={2025},
189
+ howpublished={https://huggingface.co/spaces/OpenHands/openhands-index}
190
  }</pre>
191
  """
192
  )
main_page.py CHANGED
@@ -20,17 +20,6 @@ def build_page():
20
  with gr.Column(scale=1):
21
  gr.HTML(INTRO_PARAGRAPH, elem_id="intro-paragraph")
22
 
23
- with gr.Column(scale=1):
24
- gr.Image(
25
- value="assets/overall.svg",
26
- show_label=False,
27
- interactive=False,
28
- show_download_button=False,
29
- show_fullscreen_button=False,
30
- show_share_button=False,
31
- elem_id="diagram-image"
32
- )
33
-
34
  # --- Leaderboard Display Section ---
35
  gr.Markdown("---")
36
  CATEGORY_NAME = "Overall"
 
20
  with gr.Column(scale=1):
21
  gr.HTML(INTRO_PARAGRAPH, elem_id="intro-paragraph")
22
 
 
 
 
 
 
 
 
 
 
 
 
23
  # --- Leaderboard Display Section ---
24
  gr.Markdown("---")
25
  CATEGORY_NAME = "Overall"
ui_components.py CHANGED
@@ -442,32 +442,13 @@ def create_leaderboard_display(
442
  """Prepare a DataFrame for display with all formatting applied."""
443
  df_display = df_view.copy()
444
 
 
445
  pareto_df = get_pareto_df(df_display)
446
  trophy_uri = get_svg_as_data_uri("assets/trophy.svg")
447
- trophy_icon_html = f'<img src="{trophy_uri}" alt="On Pareto Frontier" title="On Pareto Frontier" style="width:25px; height:25px;">'
448
  if not pareto_df.empty and 'id' in pareto_df.columns:
449
  pareto_agent_names = pareto_df['id'].tolist()
450
  else:
451
  pareto_agent_names = []
452
- df_display['Pareto'] = df_display.apply(
453
- lambda row: trophy_icon_html if row['id'] in pareto_agent_names else '',
454
- axis=1
455
- )
456
-
457
- def get_openness_icon_html(row):
458
- openness_val = row.get('Openness', '')
459
- # Use custom lock SVG icons: blue open lock, red closed lock
460
- if openness_val in [aliases.CANONICAL_OPENNESS_OPEN, 'Open', 'Open Source', 'Open Source + Open Weights']:
461
- uri = get_svg_as_data_uri("assets/lock-open.svg")
462
- return f'<img src="{uri}" alt="Open" title="Open source model" style="width:20px; height:20px;">'
463
- else:
464
- uri = get_svg_as_data_uri("assets/lock-closed.svg")
465
- return f'<img src="{uri}" alt="Closed" title="Closed source model" style="width:20px; height:20px;">'
466
-
467
- df_display['Icon'] = df_display.apply(get_openness_icon_html, axis=1)
468
-
469
- # Add company logo column based on the Language Model
470
- df_display['Company'] = df_display['Language Model'].apply(get_company_logo_html)
471
 
472
  for col in df_display.columns:
473
  if "Cost" in col:
@@ -477,32 +458,56 @@ def create_leaderboard_display(
477
  if "Score" in col:
478
  df_display = format_score_column(df_display, col)
479
 
 
480
  df_display['Language Model'] = df_display['Language Model'].apply(clean_llm_base_list)
481
- df_display['Language Model'] = df_display['Language Model'].apply(format_llm_base_with_html)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
 
483
  if 'Source' in df_display.columns:
484
  df_display['SDK Version'] = df_display.apply(
485
  lambda row: f"{row['SDK Version']} {row['Source']}" if pd.notna(row['Source']) and row['Source'] else row['SDK Version'],
486
  axis=1
487
  )
488
-
489
- all_cols = df_display.columns.tolist()
490
- # Move Company logo column after Icon
491
- if 'Company' in all_cols:
492
- all_cols.insert(0, all_cols.pop(all_cols.index('Company')))
493
- all_cols.insert(0, all_cols.pop(all_cols.index('Icon')))
494
- all_cols.insert(0, all_cols.pop(all_cols.index('Pareto')))
495
- df_display = df_display[all_cols]
496
 
497
  columns_to_drop = ['id', 'Openness', 'Agent Tooling', 'Source']
498
  df_display = df_display.drop(columns=columns_to_drop, errors='ignore')
499
-
500
- header_rename_map = {
501
- "Pareto": "",
502
- "Icon": "",
503
- "Company": "",
504
- }
505
- df_display = df_display.rename(columns=header_rename_map)
506
 
507
  return df_display
508
 
@@ -528,13 +533,13 @@ def create_leaderboard_display(
528
  for col in df_headers:
529
  if col == "Logs" or "Cost" in col or "Score" in col:
530
  df_datatypes.append("markdown")
531
- elif col in ["SDK Version","Language Model", ""]: # "" for renamed Pareto/Icon/Company columns
532
  df_datatypes.append("html")
533
  else:
534
  df_datatypes.append("str")
535
  # Dynamically set widths for the DataFrame columns
536
- # Order: Pareto, Icon, Company, Language Model, SDK Version, ...
537
- fixed_start_widths = [40, 40, 40, 200, 100, 200]
538
  num_score_cost_cols = 0
539
  remaining_headers = df_headers[len(fixed_start_widths):]
540
  for col in remaining_headers:
@@ -545,15 +550,11 @@ def create_leaderboard_display(
545
  # 5. Combine all the lists to create the final, fully dynamic list.
546
  final_column_widths = fixed_start_widths + dynamic_widths + fixed_end_widths
547
 
548
- with gr.Row():
549
- with gr.Column(scale=3):
550
- plot_component = gr.Plot(
551
- value=scatter_plot,
552
- show_label=False,
553
- )
554
- gr.Markdown(value=SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
555
- with gr.Column(scale=1):
556
- gr.HTML(value=plot_legend_html)
557
 
558
  # Put table and key into an accordion
559
  with gr.Accordion("Show / Hide Table View", open=True, elem_id="leaderboard-accordion"):
@@ -671,35 +672,53 @@ def create_benchmark_details_display(
671
  pareto_df = get_pareto_df(benchmark_table_df)
672
  # Get the list of agents on the frontier. We'll use this list later.
673
  trophy_uri = get_svg_as_data_uri("assets/trophy.svg")
674
- trophy_icon_html = f'<img src="{trophy_uri}" alt="On Pareto Frontier" title="On Pareto Frontier" style="width:25px; height:25px;">'
675
  if not pareto_df.empty and 'id' in pareto_df.columns:
676
  pareto_agent_names = pareto_df['id'].tolist()
677
  else:
678
  pareto_agent_names = []
679
- benchmark_table_df['Pareto'] = benchmark_table_df.apply(
680
- lambda row: trophy_icon_html if row['id'] in pareto_agent_names else '',
681
- axis=1
682
- )
683
 
684
- # Create simple openness icons using custom SVG lock icons
685
- def get_openness_icon_html(row):
 
 
 
 
 
 
 
 
 
 
686
  openness_val = row.get('Openness', '')
687
- # Use custom lock SVG icons: blue open lock, red closed lock
688
  if openness_val in [aliases.CANONICAL_OPENNESS_OPEN, 'Open', 'Open Source', 'Open Source + Open Weights']:
689
- uri = get_svg_as_data_uri("assets/lock-open.svg")
690
- return f'<img src="{uri}" alt="Open" title="Open source model" style="width:20px; height:20px;">'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
691
  else:
692
- uri = get_svg_as_data_uri("assets/lock-closed.svg")
693
- return f'<img src="{uri}" alt="Closed" title="Closed source model" style="width:20px; height:20px;">'
 
694
 
695
- benchmark_table_df['Icon'] = benchmark_table_df.apply(get_openness_icon_html, axis=1)
696
 
697
- # Add company logo column based on the Language Model
698
- benchmark_table_df['Company'] = benchmark_table_df['Language Model'].apply(get_company_logo_html)
699
-
700
- #Make pretty and format the Language Model column
701
- benchmark_table_df['Language Model'] = benchmark_table_df['Language Model'].apply(clean_llm_base_list)
702
- benchmark_table_df['Language Model'] = benchmark_table_df['Language Model'].apply(format_llm_base_with_html)
703
  # append the repro url to the end of the SDK Version
704
  if 'Source' in benchmark_table_df.columns:
705
  benchmark_table_df['SDK Version'] = benchmark_table_df.apply(
@@ -728,9 +747,6 @@ def create_benchmark_details_display(
728
  benchmark_table_df = format_cost_column(benchmark_table_df, benchmark_cost_col)
729
  benchmark_table_df = format_score_column(benchmark_table_df, benchmark_score_col)
730
  desired_cols_in_order = [
731
- 'Pareto',
732
- 'Icon',
733
- 'Company',
734
  'Language Model',
735
  'SDK Version',
736
  'Attempted Benchmark',
@@ -748,13 +764,6 @@ def create_benchmark_details_display(
748
  benchmark_score_col: 'Score',
749
  benchmark_cost_col: 'Cost',
750
  }, inplace=True)
751
- # Remove Pareto, Icon, and Company column headers (rename to empty string)
752
- header_rename_map = {
753
- "Pareto": "",
754
- "Icon": "",
755
- "Company": "",
756
- }
757
- benchmark_table_df = benchmark_table_df.rename(columns=header_rename_map)
758
 
759
  # Now get headers from the renamed dataframe
760
  df_headers = benchmark_table_df.columns.tolist()
@@ -762,7 +771,7 @@ def create_benchmark_details_display(
762
  for col in df_headers:
763
  if "Logs" in col or "Cost" in col or "Score" in col:
764
  df_datatypes.append("markdown")
765
- elif col in ["SDK Version", "Language Model", ""]: # "" for renamed Pareto/Icon/Company columns
766
  df_datatypes.append("html")
767
  else:
768
  df_datatypes.append("str")
@@ -773,12 +782,8 @@ def create_benchmark_details_display(
773
  agent_col="SDK Version",
774
  name=benchmark_name
775
  )
776
- with gr.Row():
777
- with gr.Column(scale=3):
778
- gr.Plot(value=benchmark_plot, show_label=False)
779
- gr.Markdown(value=SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
780
- with gr.Column(scale=1):
781
- gr.HTML(value=plot_legend_html)
782
 
783
  # Put table and key into an accordion
784
  with gr.Accordion("Show / Hide Table View", open=True, elem_id="leaderboard-accordion"):
 
442
  """Prepare a DataFrame for display with all formatting applied."""
443
  df_display = df_view.copy()
444
 
445
+ # Get Pareto frontier info
446
  pareto_df = get_pareto_df(df_display)
447
  trophy_uri = get_svg_as_data_uri("assets/trophy.svg")
 
448
  if not pareto_df.empty and 'id' in pareto_df.columns:
449
  pareto_agent_names = pareto_df['id'].tolist()
450
  else:
451
  pareto_agent_names = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
 
453
  for col in df_display.columns:
454
  if "Cost" in col:
 
458
  if "Score" in col:
459
  df_display = format_score_column(df_display, col)
460
 
461
+ # Clean the Language Model column first
462
  df_display['Language Model'] = df_display['Language Model'].apply(clean_llm_base_list)
463
+
464
+ # Now combine icons with Language Model column
465
+ def format_language_model_with_icons(row):
466
+ icons_html = ''
467
+
468
+ # Add Pareto trophy if on frontier
469
+ if row['id'] in pareto_agent_names:
470
+ icons_html += f'<img src="{trophy_uri}" alt="On Pareto Frontier" title="On Pareto Frontier" style="width:18px; height:18px; vertical-align:middle; margin-right:4px;">'
471
+
472
+ # Add openness lock icon
473
+ openness_val = row.get('Openness', '')
474
+ if openness_val in [aliases.CANONICAL_OPENNESS_OPEN, 'Open', 'Open Source', 'Open Source + Open Weights']:
475
+ lock_uri = get_svg_as_data_uri("assets/lock-open.svg")
476
+ icons_html += f'<img src="{lock_uri}" alt="Open" title="Open source model" style="width:16px; height:16px; vertical-align:middle; margin-right:4px;">'
477
+ else:
478
+ lock_uri = get_svg_as_data_uri("assets/lock-closed.svg")
479
+ icons_html += f'<img src="{lock_uri}" alt="Closed" title="Closed source model" style="width:16px; height:16px; vertical-align:middle; margin-right:4px;">'
480
+
481
+ # Add company logo
482
+ company_html = get_company_logo_html(row['Language Model'])
483
+ if company_html:
484
+ icons_html += company_html.replace('style="', 'style="vertical-align:middle; margin-right:6px; ')
485
+
486
+ # Format the model name
487
+ model_name = row['Language Model']
488
+ if isinstance(model_name, list):
489
+ if len(model_name) > 1:
490
+ tooltip_text = "\\n".join(map(str, model_name))
491
+ model_text = f'<span class="tooltip-icon cell-tooltip-icon" style="cursor: help;" data-tooltip="{tooltip_text}">{model_name[0]} (+ {len(model_name) - 1}) ⓘ</span>'
492
+ elif len(model_name) == 1:
493
+ model_text = model_name[0]
494
+ else:
495
+ model_text = str(model_name)
496
+ else:
497
+ model_text = str(model_name)
498
+
499
+ return f'{icons_html}{model_text}'
500
+
501
+ df_display['Language Model'] = df_display.apply(format_language_model_with_icons, axis=1)
502
 
503
  if 'Source' in df_display.columns:
504
  df_display['SDK Version'] = df_display.apply(
505
  lambda row: f"{row['SDK Version']} {row['Source']}" if pd.notna(row['Source']) and row['Source'] else row['SDK Version'],
506
  axis=1
507
  )
 
 
 
 
 
 
 
 
508
 
509
  columns_to_drop = ['id', 'Openness', 'Agent Tooling', 'Source']
510
  df_display = df_display.drop(columns=columns_to_drop, errors='ignore')
 
 
 
 
 
 
 
511
 
512
  return df_display
513
 
 
533
  for col in df_headers:
534
  if col == "Logs" or "Cost" in col or "Score" in col:
535
  df_datatypes.append("markdown")
536
+ elif col in ["SDK Version", "Language Model"]:
537
  df_datatypes.append("html")
538
  else:
539
  df_datatypes.append("str")
540
  # Dynamically set widths for the DataFrame columns
541
+ # Order: Language Model, SDK Version, Average Score, Average Cost, ...
542
+ fixed_start_widths = [280, 100, 100] # Language Model (with icons), SDK Version, Average Score
543
  num_score_cost_cols = 0
544
  remaining_headers = df_headers[len(fixed_start_widths):]
545
  for col in remaining_headers:
 
550
  # 5. Combine all the lists to create the final, fully dynamic list.
551
  final_column_widths = fixed_start_widths + dynamic_widths + fixed_end_widths
552
 
553
+ plot_component = gr.Plot(
554
+ value=scatter_plot,
555
+ show_label=False,
556
+ )
557
+ gr.Markdown(value=SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
 
 
 
 
558
 
559
  # Put table and key into an accordion
560
  with gr.Accordion("Show / Hide Table View", open=True, elem_id="leaderboard-accordion"):
 
672
  pareto_df = get_pareto_df(benchmark_table_df)
673
  # Get the list of agents on the frontier. We'll use this list later.
674
  trophy_uri = get_svg_as_data_uri("assets/trophy.svg")
 
675
  if not pareto_df.empty and 'id' in pareto_df.columns:
676
  pareto_agent_names = pareto_df['id'].tolist()
677
  else:
678
  pareto_agent_names = []
 
 
 
 
679
 
680
+ # Clean the Language Model column first
681
+ benchmark_table_df['Language Model'] = benchmark_table_df['Language Model'].apply(clean_llm_base_list)
682
+
683
+ # Combine icons with Language Model column
684
+ def format_language_model_with_icons(row):
685
+ icons_html = ''
686
+
687
+ # Add Pareto trophy if on frontier
688
+ if row['id'] in pareto_agent_names:
689
+ icons_html += f'<img src="{trophy_uri}" alt="On Pareto Frontier" title="On Pareto Frontier" style="width:18px; height:18px; vertical-align:middle; margin-right:4px;">'
690
+
691
+ # Add openness lock icon
692
  openness_val = row.get('Openness', '')
 
693
  if openness_val in [aliases.CANONICAL_OPENNESS_OPEN, 'Open', 'Open Source', 'Open Source + Open Weights']:
694
+ lock_uri = get_svg_as_data_uri("assets/lock-open.svg")
695
+ icons_html += f'<img src="{lock_uri}" alt="Open" title="Open source model" style="width:16px; height:16px; vertical-align:middle; margin-right:4px;">'
696
+ else:
697
+ lock_uri = get_svg_as_data_uri("assets/lock-closed.svg")
698
+ icons_html += f'<img src="{lock_uri}" alt="Closed" title="Closed source model" style="width:16px; height:16px; vertical-align:middle; margin-right:4px;">'
699
+
700
+ # Add company logo
701
+ company_html = get_company_logo_html(row['Language Model'])
702
+ if company_html:
703
+ icons_html += company_html.replace('style="', 'style="vertical-align:middle; margin-right:6px; ')
704
+
705
+ # Format the model name
706
+ model_name = row['Language Model']
707
+ if isinstance(model_name, list):
708
+ if len(model_name) > 1:
709
+ tooltip_text = "\\n".join(map(str, model_name))
710
+ model_text = f'<span class="tooltip-icon cell-tooltip-icon" style="cursor: help;" data-tooltip="{tooltip_text}">{model_name[0]} (+ {len(model_name) - 1}) ⓘ</span>'
711
+ elif len(model_name) == 1:
712
+ model_text = model_name[0]
713
+ else:
714
+ model_text = str(model_name)
715
  else:
716
+ model_text = str(model_name)
717
+
718
+ return f'{icons_html}{model_text}'
719
 
720
+ benchmark_table_df['Language Model'] = benchmark_table_df.apply(format_language_model_with_icons, axis=1)
721
 
 
 
 
 
 
 
722
  # append the repro url to the end of the SDK Version
723
  if 'Source' in benchmark_table_df.columns:
724
  benchmark_table_df['SDK Version'] = benchmark_table_df.apply(
 
747
  benchmark_table_df = format_cost_column(benchmark_table_df, benchmark_cost_col)
748
  benchmark_table_df = format_score_column(benchmark_table_df, benchmark_score_col)
749
  desired_cols_in_order = [
 
 
 
750
  'Language Model',
751
  'SDK Version',
752
  'Attempted Benchmark',
 
764
  benchmark_score_col: 'Score',
765
  benchmark_cost_col: 'Cost',
766
  }, inplace=True)
 
 
 
 
 
 
 
767
 
768
  # Now get headers from the renamed dataframe
769
  df_headers = benchmark_table_df.columns.tolist()
 
771
  for col in df_headers:
772
  if "Logs" in col or "Cost" in col or "Score" in col:
773
  df_datatypes.append("markdown")
774
+ elif col in ["SDK Version", "Language Model"]:
775
  df_datatypes.append("html")
776
  else:
777
  df_datatypes.append("str")
 
782
  agent_col="SDK Version",
783
  name=benchmark_name
784
  )
785
+ gr.Plot(value=benchmark_plot, show_label=False)
786
+ gr.Markdown(value=SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
 
 
 
 
787
 
788
  # Put table and key into an accordion
789
  with gr.Accordion("Show / Hide Table View", open=True, elem_id="leaderboard-accordion"):