openhands openhands commited on
Commit
5cdf97c
·
1 Parent(s): 9040dc0

Add 'Show only open models' checkbox filter

Browse files

- Added checkbox to filter leaderboard table and scatter plots to show only open-source/open-weight models
- Checkbox shows count of open and closed models (e.g., '4 open, 5 closed')
- Filter works independently and in combination with 'Show incomplete entries' checkbox
- Filter applies to both Cost/Performance and Runtime/Performance scatter plots
- Updated timer/refresh logic to maintain filter state on data refresh

Co-authored-by: openhands <openhands@all-hands.dev>

Files changed (1) hide show
  1. ui_components.py +146 -32
ui_components.py CHANGED
@@ -582,8 +582,18 @@ def create_leaderboard_display(
582
  else:
583
  df_view_complete = df_view_full.copy()
584
 
 
 
 
 
 
 
 
 
585
  df_display_complete = prepare_df_for_display(df_view_complete)
586
  df_display_all = prepare_df_for_display(df_view_full)
 
 
587
 
588
  # If no complete entries exist, show all entries by default
589
  has_complete_entries = len(df_display_complete) > 0
@@ -620,13 +630,17 @@ def create_leaderboard_display(
620
  plot_type='runtime'
621
  )
622
 
623
- # Create initial cost scatter plots for both complete and all data
624
  cost_scatter_complete = create_cost_scatter_plot(df_view_complete) if has_complete_entries else go.Figure()
625
  cost_scatter_all = create_cost_scatter_plot(df_view_full)
 
 
626
 
627
- # Create initial runtime scatter plots for both complete and all data
628
  runtime_scatter_complete = create_runtime_scatter_plot(df_view_complete) if has_complete_entries else go.Figure()
629
  runtime_scatter_all = create_runtime_scatter_plot(df_view_full)
 
 
630
 
631
  # Now get headers from the renamed dataframe (use all entries to ensure headers are present)
632
  df_headers = df_display_all.columns.tolist()
@@ -651,12 +665,14 @@ def create_leaderboard_display(
651
  # 5. Combine all the lists to create the final, fully dynamic list.
652
  final_column_widths = fixed_start_widths + dynamic_widths + fixed_end_widths
653
 
654
- # Calculate counts for the checkbox label
655
  num_complete = len(df_display_complete)
656
  num_total = len(df_display_all)
657
  num_incomplete = num_total - num_complete
 
 
658
 
659
- # Add toggle for showing incomplete entries ABOVE the plot
660
  if has_complete_entries:
661
  show_incomplete_checkbox = gr.Checkbox(
662
  label=f"Show incomplete entries ({num_incomplete} entries with fewer than 5 categories)",
@@ -666,6 +682,16 @@ def create_leaderboard_display(
666
  else:
667
  show_incomplete_checkbox = None
668
  gr.Markdown(f"*No entries with all 5 categories completed yet. Showing all {num_total} entries.*")
 
 
 
 
 
 
 
 
 
 
669
 
670
  # Plot components - show complete entries by default if available
671
  # Cost/Performance plot
@@ -700,18 +726,46 @@ def create_leaderboard_display(
700
  elem_id="main-leaderboard"
701
  )
702
 
703
- # Update function for the toggle - updates both table and plots
704
- def update_display(show_incomplete):
705
- if show_incomplete:
706
- return df_display_all, cost_scatter_all, runtime_scatter_all
 
 
 
 
707
  else:
708
- return df_display_complete, cost_scatter_complete, runtime_scatter_complete
 
 
 
709
 
710
- show_incomplete_checkbox.change(
711
- fn=update_display,
712
- inputs=[show_incomplete_checkbox],
713
- outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
714
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
715
  else:
716
  dataframe_component = gr.DataFrame(
717
  headers=df_headers,
@@ -724,13 +778,27 @@ def create_leaderboard_display(
724
  show_search="search",
725
  elem_id="main-leaderboard"
726
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
727
 
728
  legend_markdown = create_legend_markdown(category_name)
729
  gr.HTML(value=legend_markdown, elem_id="legend-markdown")
730
 
731
  # Add a timer to periodically check for data updates and refresh the UI
732
  # This runs every 60 seconds to check if new data is available
733
- def check_and_refresh_data(current_checkbox_state):
734
  """Check if data has been refreshed and return updated data if so."""
735
  current_version = get_data_version()
736
  if current_version > initial_data_version:
@@ -747,40 +815,70 @@ def create_leaderboard_display(
747
  else:
748
  new_df_view_complete = new_df_view_full.copy()
749
 
 
 
 
 
 
 
 
 
750
  new_df_display_complete = prepare_df_for_display(new_df_view_complete)
751
  new_df_display_all = prepare_df_for_display(new_df_view_full)
 
 
752
 
753
- # Create new scatter plots (both cost and runtime)
754
  new_cost_scatter_complete = create_cost_scatter_plot(new_df_view_complete) if len(new_df_display_complete) > 0 else go.Figure()
755
  new_cost_scatter_all = create_cost_scatter_plot(new_df_view_full)
 
 
 
756
  new_runtime_scatter_complete = create_runtime_scatter_plot(new_df_view_complete) if len(new_df_display_complete) > 0 else go.Figure()
757
  new_runtime_scatter_all = create_runtime_scatter_plot(new_df_view_full)
 
 
758
 
759
- # Return the appropriate data based on checkbox state
760
- if current_checkbox_state:
761
- return new_df_display_all, new_cost_scatter_all, new_runtime_scatter_all
 
 
 
762
  else:
763
- return new_df_display_complete, new_cost_scatter_complete, new_runtime_scatter_complete
 
 
 
764
 
765
- # No change, return current values
766
- if current_checkbox_state:
767
- return df_display_all, cost_scatter_all, runtime_scatter_all
 
 
 
768
  else:
769
- return df_display_complete, cost_scatter_complete, runtime_scatter_complete
 
 
 
770
 
771
  # Create a timer that checks for updates every 60 seconds
772
  refresh_timer = gr.Timer(value=60)
773
 
774
  # Connect the timer to the refresh function
775
  if show_incomplete_checkbox is not None:
 
 
 
776
  refresh_timer.tick(
777
  fn=check_and_refresh_data,
778
- inputs=[show_incomplete_checkbox],
779
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
780
  )
781
  else:
782
- # If no checkbox, always show all data
783
- def check_and_refresh_all():
784
  current_version = get_data_version()
785
  if current_version > initial_data_version:
786
  print(f"[REFRESH] Data version changed, reloading...")
@@ -788,17 +886,33 @@ def create_leaderboard_display(
788
  if not new_df.empty:
789
  new_transformer = DataTransformer(new_df, new_tag_map)
790
  new_df_view_full, _ = new_transformer.view(tag=category_name, use_plotly=True)
 
 
 
 
791
  new_df_display_all = prepare_df_for_display(new_df_view_full)
792
  new_cost_scatter_all = create_cost_scatter_plot(new_df_view_full)
793
  new_runtime_scatter_all = create_runtime_scatter_plot(new_df_view_full)
794
  return new_df_display_all, new_cost_scatter_all, new_runtime_scatter_all
 
 
 
795
  return df_display_all, cost_scatter_all, runtime_scatter_all
796
 
797
- refresh_timer.tick(
798
- fn=check_and_refresh_all,
799
- inputs=[],
800
- outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
801
- )
 
 
 
 
 
 
 
 
 
802
 
803
  # Return the components so they can be referenced elsewhere.
804
  return cost_plot_component, runtime_plot_component, dataframe_component
 
582
  else:
583
  df_view_complete = df_view_full.copy()
584
 
585
+ # Prepare open-only filtered versions (filter before prepare_df_for_display drops Openness column)
586
+ if 'Openness' in df_view_full.columns:
587
+ df_view_open = df_view_full[df_view_full['Openness'].str.lower() == 'open'].copy()
588
+ df_view_complete_open = df_view_complete[df_view_complete['Openness'].str.lower() == 'open'].copy()
589
+ else:
590
+ df_view_open = df_view_full.copy()
591
+ df_view_complete_open = df_view_complete.copy()
592
+
593
  df_display_complete = prepare_df_for_display(df_view_complete)
594
  df_display_all = prepare_df_for_display(df_view_full)
595
+ df_display_open = prepare_df_for_display(df_view_open)
596
+ df_display_complete_open = prepare_df_for_display(df_view_complete_open)
597
 
598
  # If no complete entries exist, show all entries by default
599
  has_complete_entries = len(df_display_complete) > 0
 
630
  plot_type='runtime'
631
  )
632
 
633
+ # Create initial cost scatter plots for all filter combinations
634
  cost_scatter_complete = create_cost_scatter_plot(df_view_complete) if has_complete_entries else go.Figure()
635
  cost_scatter_all = create_cost_scatter_plot(df_view_full)
636
+ cost_scatter_open = create_cost_scatter_plot(df_view_open) if len(df_view_open) > 0 else go.Figure()
637
+ cost_scatter_complete_open = create_cost_scatter_plot(df_view_complete_open) if len(df_view_complete_open) > 0 else go.Figure()
638
 
639
+ # Create initial runtime scatter plots for all filter combinations
640
  runtime_scatter_complete = create_runtime_scatter_plot(df_view_complete) if has_complete_entries else go.Figure()
641
  runtime_scatter_all = create_runtime_scatter_plot(df_view_full)
642
+ runtime_scatter_open = create_runtime_scatter_plot(df_view_open) if len(df_view_open) > 0 else go.Figure()
643
+ runtime_scatter_complete_open = create_runtime_scatter_plot(df_view_complete_open) if len(df_view_complete_open) > 0 else go.Figure()
644
 
645
  # Now get headers from the renamed dataframe (use all entries to ensure headers are present)
646
  df_headers = df_display_all.columns.tolist()
 
665
  # 5. Combine all the lists to create the final, fully dynamic list.
666
  final_column_widths = fixed_start_widths + dynamic_widths + fixed_end_widths
667
 
668
+ # Calculate counts for the checkbox labels
669
  num_complete = len(df_display_complete)
670
  num_total = len(df_display_all)
671
  num_incomplete = num_total - num_complete
672
+ num_open = len(df_display_open)
673
+ num_closed = num_total - num_open
674
 
675
+ # Add toggle checkboxes ABOVE the plot
676
  if has_complete_entries:
677
  show_incomplete_checkbox = gr.Checkbox(
678
  label=f"Show incomplete entries ({num_incomplete} entries with fewer than 5 categories)",
 
682
  else:
683
  show_incomplete_checkbox = None
684
  gr.Markdown(f"*No entries with all 5 categories completed yet. Showing all {num_total} entries.*")
685
+
686
+ # Add checkbox for open models only (always show this if there are open models)
687
+ if num_open > 0 and num_closed > 0:
688
+ show_open_only_checkbox = gr.Checkbox(
689
+ label=f"Show only open models ({num_open} open, {num_closed} closed)",
690
+ value=False,
691
+ elem_id="show-open-only-toggle"
692
+ )
693
+ else:
694
+ show_open_only_checkbox = None
695
 
696
  # Plot components - show complete entries by default if available
697
  # Cost/Performance plot
 
726
  elem_id="main-leaderboard"
727
  )
728
 
729
+ # Update function for filters - handles both checkboxes
730
+ def update_display(show_incomplete, show_open_only):
731
+ # Determine which data to show based on checkbox states
732
+ if show_open_only:
733
+ if show_incomplete:
734
+ return df_display_open, cost_scatter_open, runtime_scatter_open
735
+ else:
736
+ return df_display_complete_open, cost_scatter_complete_open, runtime_scatter_complete_open
737
  else:
738
+ if show_incomplete:
739
+ return df_display_all, cost_scatter_all, runtime_scatter_all
740
+ else:
741
+ return df_display_complete, cost_scatter_complete, runtime_scatter_complete
742
 
743
+ # Connect both checkboxes to the update function
744
+ checkbox_inputs = [show_incomplete_checkbox]
745
+ if show_open_only_checkbox is not None:
746
+ checkbox_inputs.append(show_open_only_checkbox)
747
+ show_incomplete_checkbox.change(
748
+ fn=update_display,
749
+ inputs=checkbox_inputs,
750
+ outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
751
+ )
752
+ show_open_only_checkbox.change(
753
+ fn=update_display,
754
+ inputs=checkbox_inputs,
755
+ outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
756
+ )
757
+ else:
758
+ # No open only checkbox, use simplified version
759
+ def update_display_simple(show_incomplete):
760
+ if show_incomplete:
761
+ return df_display_all, cost_scatter_all, runtime_scatter_all
762
+ else:
763
+ return df_display_complete, cost_scatter_complete, runtime_scatter_complete
764
+ show_incomplete_checkbox.change(
765
+ fn=update_display_simple,
766
+ inputs=[show_incomplete_checkbox],
767
+ outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
768
+ )
769
  else:
770
  dataframe_component = gr.DataFrame(
771
  headers=df_headers,
 
778
  show_search="search",
779
  elem_id="main-leaderboard"
780
  )
781
+
782
+ # Even without incomplete checkbox, we may have open_only checkbox
783
+ if show_open_only_checkbox is not None:
784
+ def update_display_open_only(show_open_only):
785
+ if show_open_only:
786
+ return df_display_open, cost_scatter_open, runtime_scatter_open
787
+ else:
788
+ return df_display_all, cost_scatter_all, runtime_scatter_all
789
+
790
+ show_open_only_checkbox.change(
791
+ fn=update_display_open_only,
792
+ inputs=[show_open_only_checkbox],
793
+ outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
794
+ )
795
 
796
  legend_markdown = create_legend_markdown(category_name)
797
  gr.HTML(value=legend_markdown, elem_id="legend-markdown")
798
 
799
  # Add a timer to periodically check for data updates and refresh the UI
800
  # This runs every 60 seconds to check if new data is available
801
+ def check_and_refresh_data(show_incomplete, show_open_only=False):
802
  """Check if data has been refreshed and return updated data if so."""
803
  current_version = get_data_version()
804
  if current_version > initial_data_version:
 
815
  else:
816
  new_df_view_complete = new_df_view_full.copy()
817
 
818
+ # Prepare open-only versions
819
+ if 'Openness' in new_df_view_full.columns:
820
+ new_df_view_open = new_df_view_full[new_df_view_full['Openness'].str.lower() == 'open'].copy()
821
+ new_df_view_complete_open = new_df_view_complete[new_df_view_complete['Openness'].str.lower() == 'open'].copy()
822
+ else:
823
+ new_df_view_open = new_df_view_full.copy()
824
+ new_df_view_complete_open = new_df_view_complete.copy()
825
+
826
  new_df_display_complete = prepare_df_for_display(new_df_view_complete)
827
  new_df_display_all = prepare_df_for_display(new_df_view_full)
828
+ new_df_display_open = prepare_df_for_display(new_df_view_open)
829
+ new_df_display_complete_open = prepare_df_for_display(new_df_view_complete_open)
830
 
831
+ # Create new scatter plots for all combinations
832
  new_cost_scatter_complete = create_cost_scatter_plot(new_df_view_complete) if len(new_df_display_complete) > 0 else go.Figure()
833
  new_cost_scatter_all = create_cost_scatter_plot(new_df_view_full)
834
+ new_cost_scatter_open = create_cost_scatter_plot(new_df_view_open) if len(new_df_view_open) > 0 else go.Figure()
835
+ new_cost_scatter_complete_open = create_cost_scatter_plot(new_df_view_complete_open) if len(new_df_view_complete_open) > 0 else go.Figure()
836
+
837
  new_runtime_scatter_complete = create_runtime_scatter_plot(new_df_view_complete) if len(new_df_display_complete) > 0 else go.Figure()
838
  new_runtime_scatter_all = create_runtime_scatter_plot(new_df_view_full)
839
+ new_runtime_scatter_open = create_runtime_scatter_plot(new_df_view_open) if len(new_df_view_open) > 0 else go.Figure()
840
+ new_runtime_scatter_complete_open = create_runtime_scatter_plot(new_df_view_complete_open) if len(new_df_view_complete_open) > 0 else go.Figure()
841
 
842
+ # Return the appropriate data based on checkbox states
843
+ if show_open_only:
844
+ if show_incomplete:
845
+ return new_df_display_open, new_cost_scatter_open, new_runtime_scatter_open
846
+ else:
847
+ return new_df_display_complete_open, new_cost_scatter_complete_open, new_runtime_scatter_complete_open
848
  else:
849
+ if show_incomplete:
850
+ return new_df_display_all, new_cost_scatter_all, new_runtime_scatter_all
851
+ else:
852
+ return new_df_display_complete, new_cost_scatter_complete, new_runtime_scatter_complete
853
 
854
+ # No change, return current values based on checkbox states
855
+ if show_open_only:
856
+ if show_incomplete:
857
+ return df_display_open, cost_scatter_open, runtime_scatter_open
858
+ else:
859
+ return df_display_complete_open, cost_scatter_complete_open, runtime_scatter_complete_open
860
  else:
861
+ if show_incomplete:
862
+ return df_display_all, cost_scatter_all, runtime_scatter_all
863
+ else:
864
+ return df_display_complete, cost_scatter_complete, runtime_scatter_complete
865
 
866
  # Create a timer that checks for updates every 60 seconds
867
  refresh_timer = gr.Timer(value=60)
868
 
869
  # Connect the timer to the refresh function
870
  if show_incomplete_checkbox is not None:
871
+ timer_inputs = [show_incomplete_checkbox]
872
+ if show_open_only_checkbox is not None:
873
+ timer_inputs.append(show_open_only_checkbox)
874
  refresh_timer.tick(
875
  fn=check_and_refresh_data,
876
+ inputs=timer_inputs,
877
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
878
  )
879
  else:
880
+ # If no incomplete checkbox, always show all data (but still filter by open if needed)
881
+ def check_and_refresh_all(show_open_only=False):
882
  current_version = get_data_version()
883
  if current_version > initial_data_version:
884
  print(f"[REFRESH] Data version changed, reloading...")
 
886
  if not new_df.empty:
887
  new_transformer = DataTransformer(new_df, new_tag_map)
888
  new_df_view_full, _ = new_transformer.view(tag=category_name, use_plotly=True)
889
+
890
+ if show_open_only and 'Openness' in new_df_view_full.columns:
891
+ new_df_view_full = new_df_view_full[new_df_view_full['Openness'].str.lower() == 'open'].copy()
892
+
893
  new_df_display_all = prepare_df_for_display(new_df_view_full)
894
  new_cost_scatter_all = create_cost_scatter_plot(new_df_view_full)
895
  new_runtime_scatter_all = create_runtime_scatter_plot(new_df_view_full)
896
  return new_df_display_all, new_cost_scatter_all, new_runtime_scatter_all
897
+
898
+ if show_open_only:
899
+ return df_display_open, cost_scatter_open, runtime_scatter_open
900
  return df_display_all, cost_scatter_all, runtime_scatter_all
901
 
902
+ if show_open_only_checkbox is not None:
903
+ refresh_timer.tick(
904
+ fn=check_and_refresh_all,
905
+ inputs=[show_open_only_checkbox],
906
+ outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
907
+ )
908
+ else:
909
+ def check_and_refresh_simple():
910
+ return check_and_refresh_all(False)
911
+ refresh_timer.tick(
912
+ fn=check_and_refresh_simple,
913
+ inputs=[],
914
+ outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
915
+ )
916
 
917
  # Return the components so they can be referenced elsewhere.
918
  return cost_plot_component, runtime_plot_component, dataframe_component