Add 'Show all labels' toggle for scatter plots

#2
Files changed (2) hide show
  1. leaderboard_transformer.py +52 -39
  2. ui_components.py +50 -28
leaderboard_transformer.py CHANGED
@@ -970,7 +970,8 @@ def _plot_scatter_plotly(
970
  agent_col: str = 'Agent',
971
  name: Optional[str] = None,
972
  plot_type: str = 'cost', # 'cost' or 'runtime'
973
- mark_by: Optional[str] = None # 'Company', 'Openness', or 'Country'
 
974
  ) -> go.Figure:
975
  from constants import MARK_BY_DEFAULT
976
  if mark_by is None:
@@ -1303,57 +1304,69 @@ def _plot_scatter_plotly(
1303
  layer="above",
1304
  ))
1305
 
1306
- # --- Section 7: Add Model Name Labels to Frontier Points ---
1307
- if frontier_rows:
1308
- frontier_labels_data = []
1309
-
1310
- for row in frontier_rows:
 
1311
  x_val = row[x_col_to_use]
1312
  y_val = row[y_col_to_use]
1313
 
1314
- # Get the model name for the label
1315
  model_name = row.get('Language Model', '')
1316
  if isinstance(model_name, list):
1317
  model_name = model_name[0] if model_name else ''
1318
- # Clean the model name (remove path prefixes)
1319
  model_name = str(model_name).split('/')[-1]
1320
- # Truncate long names
1321
  if len(model_name) > 25:
1322
  model_name = model_name[:22] + '...'
1323
 
1324
- frontier_labels_data.append({
1325
- 'x': x_val,
1326
- 'y': y_val,
1327
- 'label': model_name
1328
- })
1329
 
1330
- # Add annotations for each frontier label
1331
- # For log scale x-axis, annotations need log10(x) coordinates (Plotly issue #2580)
1332
- for item in frontier_labels_data:
1333
- x_val = item['x']
1334
- y_val = item['y']
1335
- label = item['label']
1336
 
1337
- # Transform x to log10 for annotation positioning on log scale
1338
- if x_val > 0:
1339
- x_log = np.log10(x_val)
1340
- else:
1341
- x_log = x_min_log
 
1342
 
1343
- fig.add_annotation(
1344
- x=x_log,
1345
- y=y_val,
1346
- text=label,
1347
- showarrow=False,
1348
- yshift=25, # Move label higher above the icon
1349
- font=dict(
1350
- size=10,
1351
- color='#0D0D0F', # neutral-950
1352
- family=FONT_FAMILY_SHORT
1353
- ),
1354
- xanchor='center',
1355
- yanchor='bottom'
1356
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1357
 
1358
  # --- Section 8: Configure Layout ---
1359
  # Use the same axis ranges as calculated for domain coordinates
 
970
  agent_col: str = 'Agent',
971
  name: Optional[str] = None,
972
  plot_type: str = 'cost', # 'cost' or 'runtime'
973
+ mark_by: Optional[str] = None, # 'Company', 'Openness', or 'Country'
974
+ show_all_labels: bool = False # Show labels for all points vs only Pareto frontier
975
  ) -> go.Figure:
976
  from constants import MARK_BY_DEFAULT
977
  if mark_by is None:
 
1304
  layer="above",
1305
  ))
1306
 
1307
+ # --- Section 7: Add Model Name Labels ---
1308
+ # Show labels for all points if show_all_labels is True, otherwise just Pareto frontier
1309
+ if show_all_labels:
1310
+ # Label all data points
1311
+ labels_data = []
1312
+ for _, row in data_plot.iterrows():
1313
  x_val = row[x_col_to_use]
1314
  y_val = row[y_col_to_use]
1315
 
 
1316
  model_name = row.get('Language Model', '')
1317
  if isinstance(model_name, list):
1318
  model_name = model_name[0] if model_name else ''
 
1319
  model_name = str(model_name).split('/')[-1]
 
1320
  if len(model_name) > 25:
1321
  model_name = model_name[:22] + '...'
1322
 
1323
+ labels_data.append({'x': x_val, 'y': y_val, 'label': model_name})
1324
+ elif frontier_rows:
1325
+ # Label only Pareto frontier points
1326
+ labels_data = []
 
1327
 
1328
+ for row in frontier_rows:
1329
+ x_val = row[x_col_to_use]
1330
+ y_val = row[y_col_to_use]
 
 
 
1331
 
1332
+ model_name = row.get('Language Model', '')
1333
+ if isinstance(model_name, list):
1334
+ model_name = model_name[0] if model_name else ''
1335
+ model_name = str(model_name).split('/')[-1]
1336
+ if len(model_name) > 25:
1337
+ model_name = model_name[:22] + '...'
1338
 
1339
+ labels_data.append({'x': x_val, 'y': y_val, 'label': model_name})
1340
+ else:
1341
+ labels_data = []
1342
+
1343
+ # Add annotations for each label
1344
+ # For log scale x-axis, annotations need log10(x) coordinates (Plotly issue #2580)
1345
+ for item in labels_data:
1346
+ x_val = item['x']
1347
+ y_val = item['y']
1348
+ label = item['label']
1349
+
1350
+ # Transform x to log10 for annotation positioning on log scale
1351
+ if x_val > 0:
1352
+ x_log = np.log10(x_val)
1353
+ else:
1354
+ x_log = x_min_log
1355
+
1356
+ fig.add_annotation(
1357
+ x=x_log,
1358
+ y=y_val,
1359
+ text=label,
1360
+ showarrow=False,
1361
+ yshift=25, # Move label higher above the icon
1362
+ font=dict(
1363
+ size=10,
1364
+ color='#0D0D0F', # neutral-950
1365
+ family=FONT_FAMILY_SHORT
1366
+ ),
1367
+ xanchor='center',
1368
+ yanchor='bottom'
1369
+ )
1370
 
1371
  # --- Section 8: Configure Layout ---
1372
  # Use the same axis ranges as calculated for domain coordinates
ui_components.py CHANGED
@@ -705,7 +705,7 @@ def create_leaderboard_display(
705
  primary_runtime_col = f"{category_name} Runtime"
706
 
707
  # Function to create cost/performance scatter plot from data
708
- def create_cost_scatter_plot(df_data, mark_by=MARK_BY_DEFAULT):
709
  return _plot_scatter_plotly(
710
  data=df_data,
711
  x=primary_cost_col if primary_cost_col in df_data.columns else None,
@@ -713,11 +713,12 @@ def create_leaderboard_display(
713
  agent_col="SDK Version",
714
  name=category_name,
715
  plot_type='cost',
716
- mark_by=mark_by
 
717
  )
718
 
719
  # Function to create runtime/performance scatter plot from data
720
- def create_runtime_scatter_plot(df_data, mark_by=MARK_BY_DEFAULT):
721
  return _plot_scatter_plotly(
722
  data=df_data,
723
  x=primary_runtime_col if primary_runtime_col in df_data.columns else None,
@@ -725,7 +726,8 @@ def create_leaderboard_display(
725
  agent_col="SDK Version",
726
  name=category_name,
727
  plot_type='runtime',
728
- mark_by=mark_by
 
729
  )
730
 
731
  # Create initial cost scatter plots for all filter combinations
@@ -792,6 +794,13 @@ def create_leaderboard_display(
792
  )
793
  else:
794
  show_open_only_checkbox = None
 
 
 
 
 
 
 
795
 
796
  with gr.Column(scale=1):
797
  mark_by_dropdown = gr.Dropdown(
@@ -835,7 +844,7 @@ def create_leaderboard_display(
835
  )
836
 
837
  # Update function for filters - handles checkboxes and mark_by dropdown
838
- def update_display(show_incomplete, show_open_only, mark_by):
839
  # Determine which dataframe to show based on checkbox states
840
  if show_open_only:
841
  df_to_show = df_display_open if show_incomplete else df_display_complete_open
@@ -844,9 +853,9 @@ def create_leaderboard_display(
844
  df_to_show = df_display_all if show_incomplete else df_display_complete
845
  view_df = df_view_full if show_incomplete else df_view_complete
846
 
847
- # Regenerate plots with current mark_by setting
848
- cost_plot = create_cost_scatter_plot(view_df, mark_by)
849
- runtime_plot = create_runtime_scatter_plot(view_df, mark_by)
850
  return df_to_show, cost_plot, runtime_plot
851
 
852
  # Connect checkboxes and dropdown to the update function
@@ -857,6 +866,7 @@ def create_leaderboard_display(
857
  # Add a dummy value for show_open_only when checkbox doesn't exist
858
  filter_inputs = [show_incomplete_checkbox, gr.State(value=False)]
859
  filter_inputs.append(mark_by_dropdown)
 
860
 
861
  show_incomplete_checkbox.change(
862
  fn=update_display,
@@ -874,6 +884,11 @@ def create_leaderboard_display(
874
  inputs=filter_inputs,
875
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
876
  )
 
 
 
 
 
877
  else:
878
  dataframe_component = gr.DataFrame(
879
  headers=df_headers,
@@ -888,15 +903,15 @@ def create_leaderboard_display(
888
  )
889
 
890
  # Update function for mark_by and optional open_only checkbox
891
- def update_display_no_complete(show_open_only, mark_by):
892
  if show_open_only:
893
  df_to_show = df_display_open
894
  view_df = df_view_open
895
  else:
896
  df_to_show = df_display_all
897
  view_df = df_view_full
898
- cost_plot = create_cost_scatter_plot(view_df, mark_by)
899
- runtime_plot = create_runtime_scatter_plot(view_df, mark_by)
900
  return df_to_show, cost_plot, runtime_plot
901
 
902
  filter_inputs_no_complete = []
@@ -905,6 +920,7 @@ def create_leaderboard_display(
905
  else:
906
  filter_inputs_no_complete.append(gr.State(value=False))
907
  filter_inputs_no_complete.append(mark_by_dropdown)
 
908
 
909
  if show_open_only_checkbox is not None:
910
  show_open_only_checkbox.change(
@@ -917,13 +933,18 @@ def create_leaderboard_display(
917
  inputs=filter_inputs_no_complete,
918
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
919
  )
 
 
 
 
 
920
 
921
  legend_markdown = create_legend_markdown(category_name)
922
  gr.HTML(value=legend_markdown, elem_id="legend-markdown")
923
 
924
  # Add a timer to periodically check for data updates and refresh the UI
925
  # This runs every 60 seconds to check if new data is available
926
- def check_and_refresh_data(show_incomplete, show_open_only=False, mark_by=MARK_BY_DEFAULT):
927
  """Check if data has been refreshed and return updated data if so."""
928
  current_version = get_data_version()
929
  if current_version > initial_data_version:
@@ -953,16 +974,16 @@ def create_leaderboard_display(
953
  new_df_display_open = prepare_df_for_display(new_df_view_open)
954
  new_df_display_complete_open = prepare_df_for_display(new_df_view_complete_open)
955
 
956
- # Create new scatter plots for all combinations (with current mark_by)
957
- new_cost_scatter_complete = create_cost_scatter_plot(new_df_view_complete, mark_by) if len(new_df_display_complete) > 0 else go.Figure()
958
- new_cost_scatter_all = create_cost_scatter_plot(new_df_view_full, mark_by)
959
- new_cost_scatter_open = create_cost_scatter_plot(new_df_view_open, mark_by) if len(new_df_view_open) > 0 else go.Figure()
960
- new_cost_scatter_complete_open = create_cost_scatter_plot(new_df_view_complete_open, mark_by) if len(new_df_view_complete_open) > 0 else go.Figure()
961
 
962
- new_runtime_scatter_complete = create_runtime_scatter_plot(new_df_view_complete, mark_by) if len(new_df_display_complete) > 0 else go.Figure()
963
- new_runtime_scatter_all = create_runtime_scatter_plot(new_df_view_full, mark_by)
964
- new_runtime_scatter_open = create_runtime_scatter_plot(new_df_view_open, mark_by) if len(new_df_view_open) > 0 else go.Figure()
965
- new_runtime_scatter_complete_open = create_runtime_scatter_plot(new_df_view_complete_open, mark_by) if len(new_df_view_complete_open) > 0 else go.Figure()
966
 
967
  # Return the appropriate data based on checkbox states
968
  if show_open_only:
@@ -997,6 +1018,7 @@ def create_leaderboard_display(
997
  if show_open_only_checkbox is not None:
998
  timer_inputs.append(show_open_only_checkbox)
999
  timer_inputs.append(mark_by_dropdown) # Always include mark_by
 
1000
  refresh_timer.tick(
1001
  fn=check_and_refresh_data,
1002
  inputs=timer_inputs,
@@ -1004,7 +1026,7 @@ def create_leaderboard_display(
1004
  )
1005
  else:
1006
  # If no incomplete checkbox, always show all data (but still filter by open if needed)
1007
- def check_and_refresh_all(show_open_only=False, mark_by=MARK_BY_DEFAULT):
1008
  current_version = get_data_version()
1009
  if current_version > initial_data_version:
1010
  print(f"[REFRESH] Data version changed, reloading...")
@@ -1017,8 +1039,8 @@ def create_leaderboard_display(
1017
  new_df_view_full = new_df_view_full[new_df_view_full['Openness'].str.lower() == 'open'].copy()
1018
 
1019
  new_df_display_all = prepare_df_for_display(new_df_view_full)
1020
- new_cost_scatter_all = create_cost_scatter_plot(new_df_view_full, mark_by)
1021
- new_runtime_scatter_all = create_runtime_scatter_plot(new_df_view_full, mark_by)
1022
  return new_df_display_all, new_cost_scatter_all, new_runtime_scatter_all
1023
 
1024
  if show_open_only:
@@ -1028,15 +1050,15 @@ def create_leaderboard_display(
1028
  if show_open_only_checkbox is not None:
1029
  refresh_timer.tick(
1030
  fn=check_and_refresh_all,
1031
- inputs=[show_open_only_checkbox, mark_by_dropdown],
1032
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
1033
  )
1034
  else:
1035
- def check_and_refresh_simple(mark_by=MARK_BY_DEFAULT):
1036
- return check_and_refresh_all(False, mark_by)
1037
  refresh_timer.tick(
1038
  fn=check_and_refresh_simple,
1039
- inputs=[mark_by_dropdown],
1040
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
1041
  )
1042
 
 
705
  primary_runtime_col = f"{category_name} Runtime"
706
 
707
  # Function to create cost/performance scatter plot from data
708
+ def create_cost_scatter_plot(df_data, mark_by=MARK_BY_DEFAULT, show_all_labels=False):
709
  return _plot_scatter_plotly(
710
  data=df_data,
711
  x=primary_cost_col if primary_cost_col in df_data.columns else None,
 
713
  agent_col="SDK Version",
714
  name=category_name,
715
  plot_type='cost',
716
+ mark_by=mark_by,
717
+ show_all_labels=show_all_labels
718
  )
719
 
720
  # Function to create runtime/performance scatter plot from data
721
+ def create_runtime_scatter_plot(df_data, mark_by=MARK_BY_DEFAULT, show_all_labels=False):
722
  return _plot_scatter_plotly(
723
  data=df_data,
724
  x=primary_runtime_col if primary_runtime_col in df_data.columns else None,
 
726
  agent_col="SDK Version",
727
  name=category_name,
728
  plot_type='runtime',
729
+ mark_by=mark_by,
730
+ show_all_labels=show_all_labels
731
  )
732
 
733
  # Create initial cost scatter plots for all filter combinations
 
794
  )
795
  else:
796
  show_open_only_checkbox = None
797
+
798
+ # Add checkbox for showing all labels on scatter plot
799
+ show_all_labels_checkbox = gr.Checkbox(
800
+ label="Show all labels on scatter plots",
801
+ value=False,
802
+ elem_id="show-all-labels-toggle"
803
+ )
804
 
805
  with gr.Column(scale=1):
806
  mark_by_dropdown = gr.Dropdown(
 
844
  )
845
 
846
  # Update function for filters - handles checkboxes and mark_by dropdown
847
+ def update_display(show_incomplete, show_open_only, mark_by, show_all_labels):
848
  # Determine which dataframe to show based on checkbox states
849
  if show_open_only:
850
  df_to_show = df_display_open if show_incomplete else df_display_complete_open
 
853
  df_to_show = df_display_all if show_incomplete else df_display_complete
854
  view_df = df_view_full if show_incomplete else df_view_complete
855
 
856
+ # Regenerate plots with current mark_by and show_all_labels settings
857
+ cost_plot = create_cost_scatter_plot(view_df, mark_by, show_all_labels)
858
+ runtime_plot = create_runtime_scatter_plot(view_df, mark_by, show_all_labels)
859
  return df_to_show, cost_plot, runtime_plot
860
 
861
  # Connect checkboxes and dropdown to the update function
 
866
  # Add a dummy value for show_open_only when checkbox doesn't exist
867
  filter_inputs = [show_incomplete_checkbox, gr.State(value=False)]
868
  filter_inputs.append(mark_by_dropdown)
869
+ filter_inputs.append(show_all_labels_checkbox)
870
 
871
  show_incomplete_checkbox.change(
872
  fn=update_display,
 
884
  inputs=filter_inputs,
885
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
886
  )
887
+ show_all_labels_checkbox.change(
888
+ fn=update_display,
889
+ inputs=filter_inputs,
890
+ outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
891
+ )
892
  else:
893
  dataframe_component = gr.DataFrame(
894
  headers=df_headers,
 
903
  )
904
 
905
  # Update function for mark_by and optional open_only checkbox
906
+ def update_display_no_complete(show_open_only, mark_by, show_all_labels):
907
  if show_open_only:
908
  df_to_show = df_display_open
909
  view_df = df_view_open
910
  else:
911
  df_to_show = df_display_all
912
  view_df = df_view_full
913
+ cost_plot = create_cost_scatter_plot(view_df, mark_by, show_all_labels)
914
+ runtime_plot = create_runtime_scatter_plot(view_df, mark_by, show_all_labels)
915
  return df_to_show, cost_plot, runtime_plot
916
 
917
  filter_inputs_no_complete = []
 
920
  else:
921
  filter_inputs_no_complete.append(gr.State(value=False))
922
  filter_inputs_no_complete.append(mark_by_dropdown)
923
+ filter_inputs_no_complete.append(show_all_labels_checkbox)
924
 
925
  if show_open_only_checkbox is not None:
926
  show_open_only_checkbox.change(
 
933
  inputs=filter_inputs_no_complete,
934
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
935
  )
936
+ show_all_labels_checkbox.change(
937
+ fn=update_display_no_complete,
938
+ inputs=filter_inputs_no_complete,
939
+ outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
940
+ )
941
 
942
  legend_markdown = create_legend_markdown(category_name)
943
  gr.HTML(value=legend_markdown, elem_id="legend-markdown")
944
 
945
  # Add a timer to periodically check for data updates and refresh the UI
946
  # This runs every 60 seconds to check if new data is available
947
+ def check_and_refresh_data(show_incomplete, show_open_only=False, mark_by=MARK_BY_DEFAULT, show_all_labels=False):
948
  """Check if data has been refreshed and return updated data if so."""
949
  current_version = get_data_version()
950
  if current_version > initial_data_version:
 
974
  new_df_display_open = prepare_df_for_display(new_df_view_open)
975
  new_df_display_complete_open = prepare_df_for_display(new_df_view_complete_open)
976
 
977
+ # Create new scatter plots for all combinations (with current mark_by and show_all_labels)
978
+ new_cost_scatter_complete = create_cost_scatter_plot(new_df_view_complete, mark_by, show_all_labels) if len(new_df_display_complete) > 0 else go.Figure()
979
+ new_cost_scatter_all = create_cost_scatter_plot(new_df_view_full, mark_by, show_all_labels)
980
+ new_cost_scatter_open = create_cost_scatter_plot(new_df_view_open, mark_by, show_all_labels) if len(new_df_view_open) > 0 else go.Figure()
981
+ new_cost_scatter_complete_open = create_cost_scatter_plot(new_df_view_complete_open, mark_by, show_all_labels) if len(new_df_view_complete_open) > 0 else go.Figure()
982
 
983
+ new_runtime_scatter_complete = create_runtime_scatter_plot(new_df_view_complete, mark_by, show_all_labels) if len(new_df_display_complete) > 0 else go.Figure()
984
+ new_runtime_scatter_all = create_runtime_scatter_plot(new_df_view_full, mark_by, show_all_labels)
985
+ new_runtime_scatter_open = create_runtime_scatter_plot(new_df_view_open, mark_by, show_all_labels) if len(new_df_view_open) > 0 else go.Figure()
986
+ new_runtime_scatter_complete_open = create_runtime_scatter_plot(new_df_view_complete_open, mark_by, show_all_labels) if len(new_df_view_complete_open) > 0 else go.Figure()
987
 
988
  # Return the appropriate data based on checkbox states
989
  if show_open_only:
 
1018
  if show_open_only_checkbox is not None:
1019
  timer_inputs.append(show_open_only_checkbox)
1020
  timer_inputs.append(mark_by_dropdown) # Always include mark_by
1021
+ timer_inputs.append(show_all_labels_checkbox)
1022
  refresh_timer.tick(
1023
  fn=check_and_refresh_data,
1024
  inputs=timer_inputs,
 
1026
  )
1027
  else:
1028
  # If no incomplete checkbox, always show all data (but still filter by open if needed)
1029
+ def check_and_refresh_all(show_open_only=False, mark_by=MARK_BY_DEFAULT, show_all_labels=False):
1030
  current_version = get_data_version()
1031
  if current_version > initial_data_version:
1032
  print(f"[REFRESH] Data version changed, reloading...")
 
1039
  new_df_view_full = new_df_view_full[new_df_view_full['Openness'].str.lower() == 'open'].copy()
1040
 
1041
  new_df_display_all = prepare_df_for_display(new_df_view_full)
1042
+ new_cost_scatter_all = create_cost_scatter_plot(new_df_view_full, mark_by, show_all_labels)
1043
+ new_runtime_scatter_all = create_runtime_scatter_plot(new_df_view_full, mark_by, show_all_labels)
1044
  return new_df_display_all, new_cost_scatter_all, new_runtime_scatter_all
1045
 
1046
  if show_open_only:
 
1050
  if show_open_only_checkbox is not None:
1051
  refresh_timer.tick(
1052
  fn=check_and_refresh_all,
1053
+ inputs=[show_open_only_checkbox, mark_by_dropdown, show_all_labels_checkbox],
1054
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
1055
  )
1056
  else:
1057
+ def check_and_refresh_simple(mark_by=MARK_BY_DEFAULT, show_all_labels=False):
1058
+ return check_and_refresh_all(False, mark_by, show_all_labels)
1059
  refresh_timer.tick(
1060
  fn=check_and_refresh_simple,
1061
+ inputs=[mark_by_dropdown, show_all_labels_checkbox],
1062
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
1063
  )
1064