openhands openhands commited on
Commit
ed6e90d
·
1 Parent(s): a87403b

Add 'Mark systems by' selector for scatter plot icons (Company/Openness/Country)

Browse files

- Add MARK_BY_CHOICES and MARK_BY_DEFAULT constants
- Add country flag SVGs (US, China, France)
- Add COUNTRY_FLAG_MAP and MODEL_COUNTRY_MAP for country detection
- Add get_openness_icon() and get_country_from_model() functions
- Add get_marker_icon() to select appropriate icon based on mark_by setting
- Modify _plot_scatter_plotly() to accept mark_by parameter
- Add mark_by_dropdown to UI in ui_components.py
- Update update_display() to regenerate plots dynamically based on mark_by selection

Co-authored-by: openhands <openhands@all-hands.dev>

assets/flag-cn.svg ADDED
assets/flag-fr.svg ADDED
assets/flag-us.svg ADDED
constants.py CHANGED
@@ -3,3 +3,10 @@
3
  # Font settings
4
  FONT_FAMILY = "Arial, sans-serif"
5
  FONT_FAMILY_SHORT = "Arial" # For places that don't accept fallbacks
 
 
 
 
 
 
 
 
3
  # Font settings
4
  FONT_FAMILY = "Arial, sans-serif"
5
  FONT_FAMILY_SHORT = "Arial" # For places that don't accept fallbacks
6
+
7
+ # Marker options for plot icons
8
+ MARK_BY_COMPANY = "Company"
9
+ MARK_BY_OPENNESS = "Openness"
10
+ MARK_BY_COUNTRY = "Country"
11
+ MARK_BY_CHOICES = [MARK_BY_COMPANY, MARK_BY_OPENNESS, MARK_BY_COUNTRY]
12
+ MARK_BY_DEFAULT = MARK_BY_COMPANY
leaderboard_transformer.py CHANGED
@@ -40,6 +40,37 @@ COMPANY_LOGO_MAP = {
40
  "minimax": {"path": "assets/logo-minimax.svg", "name": "MiniMax"},
41
  }
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  # OpenHands branding constants
44
  OPENHANDS_LOGO_PATH_LIGHT = "assets/openhands_logo_color_forwhite.png"
45
  OPENHANDS_LOGO_PATH_DARK = "assets/openhands_logo_color_forblack.png"
@@ -150,6 +181,66 @@ def get_company_from_model(model_name: str) -> dict:
150
 
151
  return {"path": "assets/logo-unknown.svg", "name": "Unknown"}
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  INFORMAL_TO_FORMAL_NAME_MAP = {
154
  # Short Names
155
  "lit": "Literature Understanding",
@@ -489,8 +580,12 @@ def _plot_scatter_plotly(
489
  y: str,
490
  agent_col: str = 'Agent',
491
  name: Optional[str] = None,
492
- plot_type: str = 'cost' # 'cost' or 'runtime'
 
493
  ) -> go.Figure:
 
 
 
494
 
495
  # --- Section 1: Define Mappings ---
496
  # Map openness to colors (simplified: open vs closed)
@@ -697,8 +792,9 @@ def _plot_scatter_plotly(
697
 
698
  for _, row in data_plot.iterrows():
699
  model_name = row.get('Language Model', '')
700
- company_info = get_company_from_model(model_name)
701
- logo_path = company_info['path']
 
702
 
703
  # Read the SVG file and encode as base64 data URI
704
  if os.path.exists(logo_path):
 
40
  "minimax": {"path": "assets/logo-minimax.svg", "name": "MiniMax"},
41
  }
42
 
43
+ # Openness icon mapping
44
+ OPENNESS_ICON_MAP = {
45
+ "open": {"path": "assets/lock-open.svg", "name": "Open"},
46
+ "closed": {"path": "assets/lock-closed.svg", "name": "Closed"},
47
+ }
48
+
49
+ # Country flag mapping - maps model name patterns to country flags
50
+ COUNTRY_FLAG_MAP = {
51
+ "us": {"path": "assets/flag-us.svg", "name": "United States"},
52
+ "cn": {"path": "assets/flag-cn.svg", "name": "China"},
53
+ "fr": {"path": "assets/flag-fr.svg", "name": "France"},
54
+ }
55
+
56
+ # Model to country mapping (based on company headquarters)
57
+ MODEL_COUNTRY_MAP = {
58
+ # US companies
59
+ "anthropic": "us", "claude": "us",
60
+ "openai": "us", "gpt": "us", "o1": "us", "o3": "us",
61
+ "google": "us", "gemini": "us", "gemma": "us",
62
+ "meta": "us", "llama": "us",
63
+ "xai": "us", "grok": "us",
64
+ "cohere": "us", "command": "us",
65
+ # China companies
66
+ "deepseek": "cn",
67
+ "qwen": "cn", "alibaba": "cn",
68
+ "kimi": "cn", "moonshot": "cn",
69
+ "minimax": "cn",
70
+ # France companies
71
+ "mistral": "fr", "mixtral": "fr", "codestral": "fr",
72
+ }
73
+
74
  # OpenHands branding constants
75
  OPENHANDS_LOGO_PATH_LIGHT = "assets/openhands_logo_color_forwhite.png"
76
  OPENHANDS_LOGO_PATH_DARK = "assets/openhands_logo_color_forblack.png"
 
181
 
182
  return {"path": "assets/logo-unknown.svg", "name": "Unknown"}
183
 
184
+
185
+ def get_openness_icon(openness: str) -> dict:
186
+ """
187
+ Gets the openness icon info (path and name) from openness value.
188
+ Returns closed icon as default.
189
+ """
190
+ if not openness:
191
+ return OPENNESS_ICON_MAP["closed"]
192
+
193
+ openness_lower = str(openness).lower()
194
+ if openness_lower in OPENNESS_ICON_MAP:
195
+ return OPENNESS_ICON_MAP[openness_lower]
196
+
197
+ return OPENNESS_ICON_MAP["closed"]
198
+
199
+
200
+ def get_country_from_model(model_name: str) -> dict:
201
+ """
202
+ Gets the country flag info (path and name) from a model name.
203
+ Returns US flag as default.
204
+ """
205
+ if not model_name:
206
+ return COUNTRY_FLAG_MAP["us"]
207
+
208
+ # Handle list of models - use the first one
209
+ if isinstance(model_name, list):
210
+ model_name = model_name[0] if model_name else ""
211
+
212
+ model_lower = str(model_name).lower()
213
+
214
+ # Check each pattern
215
+ for pattern, country_code in MODEL_COUNTRY_MAP.items():
216
+ if pattern in model_lower:
217
+ return COUNTRY_FLAG_MAP.get(country_code, COUNTRY_FLAG_MAP["us"])
218
+
219
+ return COUNTRY_FLAG_MAP["us"]
220
+
221
+
222
+ def get_marker_icon(model_name: str, openness: str, mark_by: str) -> dict:
223
+ """
224
+ Gets the appropriate icon based on the mark_by selection.
225
+
226
+ Args:
227
+ model_name: The model name
228
+ openness: The openness value (open/closed)
229
+ mark_by: One of "Company", "Openness", or "Country"
230
+
231
+ Returns:
232
+ dict with 'path' and 'name' keys
233
+ """
234
+ from constants import MARK_BY_COMPANY, MARK_BY_OPENNESS, MARK_BY_COUNTRY
235
+
236
+ if mark_by == MARK_BY_OPENNESS:
237
+ return get_openness_icon(openness)
238
+ elif mark_by == MARK_BY_COUNTRY:
239
+ return get_country_from_model(model_name)
240
+ else: # Default to company
241
+ return get_company_from_model(model_name)
242
+
243
+
244
  INFORMAL_TO_FORMAL_NAME_MAP = {
245
  # Short Names
246
  "lit": "Literature Understanding",
 
580
  y: str,
581
  agent_col: str = 'Agent',
582
  name: Optional[str] = None,
583
+ plot_type: str = 'cost', # 'cost' or 'runtime'
584
+ mark_by: Optional[str] = None # 'Company', 'Openness', or 'Country'
585
  ) -> go.Figure:
586
+ from constants import MARK_BY_DEFAULT
587
+ if mark_by is None:
588
+ mark_by = MARK_BY_DEFAULT
589
 
590
  # --- Section 1: Define Mappings ---
591
  # Map openness to colors (simplified: open vs closed)
 
792
 
793
  for _, row in data_plot.iterrows():
794
  model_name = row.get('Language Model', '')
795
+ openness = row.get('Openness', '')
796
+ marker_info = get_marker_icon(model_name, openness, mark_by)
797
+ logo_path = marker_info['path']
798
 
799
  # Read the SVG file and encode as base64 data URI
800
  if os.path.exists(logo_path):
main_page.py CHANGED
@@ -36,8 +36,8 @@ def build_page():
36
 
37
  test_df, test_tag_map = get_full_leaderboard_data("test")
38
  if not test_df.empty:
39
- # Get the checkbox returned from create_leaderboard_display
40
- show_open_only_checkbox = create_leaderboard_display(
41
  full_df=test_df,
42
  tag_map=test_tag_map,
43
  category_name=CATEGORY_NAME,
 
36
 
37
  test_df, test_tag_map = get_full_leaderboard_data("test")
38
  if not test_df.empty:
39
+ # Get the checkbox and dropdown returned from create_leaderboard_display
40
+ show_open_only_checkbox, mark_by_dropdown = create_leaderboard_display(
41
  full_df=test_df,
42
  tag_map=test_tag_map,
43
  category_name=CATEGORY_NAME,
ui_components.py CHANGED
@@ -8,6 +8,7 @@ import re
8
  from huggingface_hub import HfApi
9
 
10
  import aliases
 
11
  from simple_data_loader import SimpleLeaderboardViewer
12
  from leaderboard_transformer import (
13
  DataTransformer,
@@ -688,25 +689,27 @@ def create_leaderboard_display(
688
  primary_runtime_col = f"{category_name} Runtime"
689
 
690
  # Function to create cost/performance scatter plot from data
691
- def create_cost_scatter_plot(df_data):
692
  return _plot_scatter_plotly(
693
  data=df_data,
694
  x=primary_cost_col if primary_cost_col in df_data.columns else None,
695
  y=primary_score_col if primary_score_col in df_data.columns else "Average Score",
696
  agent_col="SDK Version",
697
  name=category_name,
698
- plot_type='cost'
 
699
  )
700
 
701
  # Function to create runtime/performance scatter plot from data
702
- def create_runtime_scatter_plot(df_data):
703
  return _plot_scatter_plotly(
704
  data=df_data,
705
  x=primary_runtime_col if primary_runtime_col in df_data.columns else None,
706
  y=primary_score_col if primary_score_col in df_data.columns else "Average Score",
707
  agent_col="SDK Version",
708
  name=category_name,
709
- plot_type='runtime'
 
710
  )
711
 
712
  # Create initial cost scatter plots for all filter combinations
@@ -751,26 +754,36 @@ def create_leaderboard_display(
751
  num_open = len(df_display_open)
752
  num_closed = num_total - num_open
753
 
754
- # Add toggle checkboxes ABOVE the plot
755
- if has_complete_entries:
756
- show_incomplete_checkbox = gr.Checkbox(
757
- label=f"Show incomplete entries ({num_incomplete} entries with fewer than 5 categories)",
758
- value=False,
759
- elem_id="show-incomplete-toggle"
760
- )
761
- else:
762
- show_incomplete_checkbox = None
763
- gr.Markdown(f"*No entries with all 5 categories completed yet. Showing all {num_total} entries.*")
764
-
765
- # Add checkbox for open models only (always show this if there are open models)
766
- if num_open > 0 and num_closed > 0:
767
- show_open_only_checkbox = gr.Checkbox(
768
- label=f"Show only open models ({num_open} open, {num_closed} closed)",
769
- value=False,
770
- elem_id="show-open-only-toggle"
771
- )
772
- else:
773
- show_open_only_checkbox = None
 
 
 
 
 
 
 
 
 
 
774
 
775
  # Plot components - show complete entries by default if available
776
  # Cost/Performance plot
@@ -805,46 +818,46 @@ def create_leaderboard_display(
805
  elem_id="main-leaderboard"
806
  )
807
 
808
- # Update function for filters - handles both checkboxes
809
- def update_display(show_incomplete, show_open_only):
810
- # Determine which data to show based on checkbox states
811
  if show_open_only:
812
- if show_incomplete:
813
- return df_display_open, cost_scatter_open, runtime_scatter_open
814
- else:
815
- return df_display_complete_open, cost_scatter_complete_open, runtime_scatter_complete_open
816
  else:
817
- if show_incomplete:
818
- return df_display_all, cost_scatter_all, runtime_scatter_all
819
- else:
820
- return df_display_complete, cost_scatter_complete, runtime_scatter_complete
 
 
 
821
 
822
- # Connect both checkboxes to the update function
823
- checkbox_inputs = [show_incomplete_checkbox]
 
 
 
 
 
 
 
 
 
 
 
 
824
  if show_open_only_checkbox is not None:
825
- checkbox_inputs.append(show_open_only_checkbox)
826
- show_incomplete_checkbox.change(
827
- fn=update_display,
828
- inputs=checkbox_inputs,
829
- outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
830
- )
831
  show_open_only_checkbox.change(
832
  fn=update_display,
833
- inputs=checkbox_inputs,
834
- outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
835
- )
836
- else:
837
- # No open only checkbox, use simplified version
838
- def update_display_simple(show_incomplete):
839
- if show_incomplete:
840
- return df_display_all, cost_scatter_all, runtime_scatter_all
841
- else:
842
- return df_display_complete, cost_scatter_complete, runtime_scatter_complete
843
- show_incomplete_checkbox.change(
844
- fn=update_display_simple,
845
- inputs=[show_incomplete_checkbox],
846
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
847
  )
 
 
 
 
 
848
  else:
849
  dataframe_component = gr.DataFrame(
850
  headers=df_headers,
@@ -858,19 +871,36 @@ def create_leaderboard_display(
858
  elem_id="main-leaderboard"
859
  )
860
 
861
- # Even without incomplete checkbox, we may have open_only checkbox
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
862
  if show_open_only_checkbox is not None:
863
- def update_display_open_only(show_open_only):
864
- if show_open_only:
865
- return df_display_open, cost_scatter_open, runtime_scatter_open
866
- else:
867
- return df_display_all, cost_scatter_all, runtime_scatter_all
868
-
869
  show_open_only_checkbox.change(
870
- fn=update_display_open_only,
871
- inputs=[show_open_only_checkbox],
872
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
873
  )
 
 
 
 
 
874
 
875
  legend_markdown = create_legend_markdown(category_name)
876
  gr.HTML(value=legend_markdown, elem_id="legend-markdown")
@@ -993,8 +1023,8 @@ def create_leaderboard_display(
993
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
994
  )
995
 
996
- # Return the show_open_only_checkbox so it can be used to update other sections
997
- return show_open_only_checkbox
998
 
999
  # # --- Detailed Benchmark Display ---
1000
  def create_benchmark_details_display(
 
8
  from huggingface_hub import HfApi
9
 
10
  import aliases
11
+ from constants import MARK_BY_CHOICES, MARK_BY_DEFAULT
12
  from simple_data_loader import SimpleLeaderboardViewer
13
  from leaderboard_transformer import (
14
  DataTransformer,
 
689
  primary_runtime_col = f"{category_name} Runtime"
690
 
691
  # Function to create cost/performance scatter plot from data
692
+ def create_cost_scatter_plot(df_data, mark_by=MARK_BY_DEFAULT):
693
  return _plot_scatter_plotly(
694
  data=df_data,
695
  x=primary_cost_col if primary_cost_col in df_data.columns else None,
696
  y=primary_score_col if primary_score_col in df_data.columns else "Average Score",
697
  agent_col="SDK Version",
698
  name=category_name,
699
+ plot_type='cost',
700
+ mark_by=mark_by
701
  )
702
 
703
  # Function to create runtime/performance scatter plot from data
704
+ def create_runtime_scatter_plot(df_data, mark_by=MARK_BY_DEFAULT):
705
  return _plot_scatter_plotly(
706
  data=df_data,
707
  x=primary_runtime_col if primary_runtime_col in df_data.columns else None,
708
  y=primary_score_col if primary_score_col in df_data.columns else "Average Score",
709
  agent_col="SDK Version",
710
  name=category_name,
711
+ plot_type='runtime',
712
+ mark_by=mark_by
713
  )
714
 
715
  # Create initial cost scatter plots for all filter combinations
 
754
  num_open = len(df_display_open)
755
  num_closed = num_total - num_open
756
 
757
+ # Add toggle checkboxes and dropdown ABOVE the plot
758
+ with gr.Row():
759
+ with gr.Column(scale=3):
760
+ if has_complete_entries:
761
+ show_incomplete_checkbox = gr.Checkbox(
762
+ label=f"Show incomplete entries ({num_incomplete} entries with fewer than 5 categories)",
763
+ value=False,
764
+ elem_id="show-incomplete-toggle"
765
+ )
766
+ else:
767
+ show_incomplete_checkbox = None
768
+ gr.Markdown(f"*No entries with all 5 categories completed yet. Showing all {num_total} entries.*")
769
+
770
+ # Add checkbox for open models only (always show this if there are open models)
771
+ if num_open > 0 and num_closed > 0:
772
+ show_open_only_checkbox = gr.Checkbox(
773
+ label=f"Show only open models ({num_open} open, {num_closed} closed)",
774
+ value=False,
775
+ elem_id="show-open-only-toggle"
776
+ )
777
+ else:
778
+ show_open_only_checkbox = None
779
+
780
+ with gr.Column(scale=1):
781
+ mark_by_dropdown = gr.Dropdown(
782
+ choices=MARK_BY_CHOICES,
783
+ value=MARK_BY_DEFAULT,
784
+ label="Mark systems by",
785
+ elem_id="mark-by-dropdown"
786
+ )
787
 
788
  # Plot components - show complete entries by default if available
789
  # Cost/Performance plot
 
818
  elem_id="main-leaderboard"
819
  )
820
 
821
+ # Update function for filters - handles checkboxes and mark_by dropdown
822
+ def update_display(show_incomplete, show_open_only, mark_by):
823
+ # Determine which dataframe to show based on checkbox states
824
  if show_open_only:
825
+ df_to_show = df_display_open if show_incomplete else df_display_complete_open
826
+ view_df = df_view_open if show_incomplete else df_view_complete_open
 
 
827
  else:
828
+ df_to_show = df_display_all if show_incomplete else df_display_complete
829
+ view_df = df_view_full if show_incomplete else df_view_complete
830
+
831
+ # Regenerate plots with current mark_by setting
832
+ cost_plot = create_cost_scatter_plot(view_df, mark_by)
833
+ runtime_plot = create_runtime_scatter_plot(view_df, mark_by)
834
+ return df_to_show, cost_plot, runtime_plot
835
 
836
+ # Connect checkboxes and dropdown to the update function
837
+ filter_inputs = [show_incomplete_checkbox]
838
+ if show_open_only_checkbox is not None:
839
+ filter_inputs.append(show_open_only_checkbox)
840
+ else:
841
+ # Add a dummy value for show_open_only when checkbox doesn't exist
842
+ filter_inputs = [show_incomplete_checkbox, gr.State(value=False)]
843
+ filter_inputs.append(mark_by_dropdown)
844
+
845
+ show_incomplete_checkbox.change(
846
+ fn=update_display,
847
+ inputs=filter_inputs,
848
+ outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
849
+ )
850
  if show_open_only_checkbox is not None:
 
 
 
 
 
 
851
  show_open_only_checkbox.change(
852
  fn=update_display,
853
+ inputs=filter_inputs,
 
 
 
 
 
 
 
 
 
 
 
 
854
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
855
  )
856
+ mark_by_dropdown.change(
857
+ fn=update_display,
858
+ inputs=filter_inputs,
859
+ outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
860
+ )
861
  else:
862
  dataframe_component = gr.DataFrame(
863
  headers=df_headers,
 
871
  elem_id="main-leaderboard"
872
  )
873
 
874
+ # Update function for mark_by and optional open_only checkbox
875
+ def update_display_no_complete(show_open_only, mark_by):
876
+ if show_open_only:
877
+ df_to_show = df_display_open
878
+ view_df = df_view_open
879
+ else:
880
+ df_to_show = df_display_all
881
+ view_df = df_view_full
882
+ cost_plot = create_cost_scatter_plot(view_df, mark_by)
883
+ runtime_plot = create_runtime_scatter_plot(view_df, mark_by)
884
+ return df_to_show, cost_plot, runtime_plot
885
+
886
+ filter_inputs_no_complete = []
887
+ if show_open_only_checkbox is not None:
888
+ filter_inputs_no_complete.append(show_open_only_checkbox)
889
+ else:
890
+ filter_inputs_no_complete.append(gr.State(value=False))
891
+ filter_inputs_no_complete.append(mark_by_dropdown)
892
+
893
  if show_open_only_checkbox is not None:
 
 
 
 
 
 
894
  show_open_only_checkbox.change(
895
+ fn=update_display_no_complete,
896
+ inputs=filter_inputs_no_complete,
897
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
898
  )
899
+ mark_by_dropdown.change(
900
+ fn=update_display_no_complete,
901
+ inputs=filter_inputs_no_complete,
902
+ outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
903
+ )
904
 
905
  legend_markdown = create_legend_markdown(category_name)
906
  gr.HTML(value=legend_markdown, elem_id="legend-markdown")
 
1023
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
1024
  )
1025
 
1026
+ # Return the show_open_only_checkbox and mark_by_dropdown so they can be used to update other sections
1027
+ return show_open_only_checkbox, mark_by_dropdown
1028
 
1029
  # # --- Detailed Benchmark Display ---
1030
  def create_benchmark_details_display(