openhands commited on
Commit
0b126a8
·
1 Parent(s): 41e5220

Hide incomplete entries from home page charts by default

Browse files
Files changed (2) hide show
  1. main_page.py +81 -41
  2. ui_components.py +2 -2
main_page.py CHANGED
@@ -1,6 +1,7 @@
1
  import matplotlib
2
  matplotlib.use('Agg')
3
  import gradio as gr
 
4
 
5
 
6
  from ui_components import (
@@ -26,6 +27,32 @@ from constants import MARK_BY_DEFAULT
26
  CACHED_VIEWERS = {}
27
  CACHED_TAG_MAPS = {}
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def build_page():
30
  with gr.Row(elem_id="intro-row"):
31
  with gr.Column(scale=1):
@@ -38,78 +65,91 @@ def build_page():
38
 
39
  test_df, test_tag_map = get_full_leaderboard_data("test")
40
  if not test_df.empty:
41
- # Get the checkbox and dropdown returned from create_leaderboard_display
42
- show_open_only_checkbox, mark_by_dropdown = create_leaderboard_display(
43
  full_df=test_df,
44
  tag_map=test_tag_map,
45
  category_name=CATEGORY_NAME,
46
  split_name="test"
47
  )
48
-
49
- # Prepare open-only filtered dataframe for Winners and Evolution
 
 
50
  if 'Openness' in test_df.columns:
51
  test_df_open = test_df[test_df['Openness'].str.lower() == 'open'].copy()
52
  else:
53
  test_df_open = test_df.copy()
54
-
 
 
 
55
  # --- Winners by Category Section ---
56
  gr.Markdown("---")
57
  gr.HTML('<h2>Winners by Category</h2>', elem_id="winners-header")
58
  gr.Markdown("Top 5 performing systems in each benchmark category.")
59
-
60
- # Create both all and open-only versions of winners HTML
61
- winners_html_all = create_winners_by_category_html(test_df, top_n=5)
62
- winners_html_open = create_winners_by_category_html(test_df_open, top_n=5)
63
-
64
- winners_component = gr.HTML(winners_html_all, elem_id="winners-by-category")
65
-
66
  # --- New Visualization Sections ---
67
  gr.Markdown("---")
68
-
69
  # Evolution Over Time Section
70
  gr.HTML('<h2>Evolution Over Time</h2>', elem_id="evolution-header")
71
  gr.Markdown("Track how model performance has improved over time based on release dates.")
72
-
73
- # Create initial evolution chart with default mark_by
74
- evolution_fig_all = create_evolution_over_time_chart(test_df, MARK_BY_DEFAULT)
75
-
76
- evolution_component = gr.Plot(value=evolution_fig_all, elem_id="evolution-chart")
77
-
78
  gr.Markdown("---")
79
-
80
  # Open Model Accuracy by Size Section (always shows open models only by design)
81
  gr.HTML('<h2>Open Model Accuracy by Size</h2>', elem_id="size-accuracy-header")
82
  gr.Markdown("Compare open-weights model performance against their parameter count.")
83
-
84
- size_fig = create_accuracy_by_size_chart(test_df, MARK_BY_DEFAULT)
85
- size_component = gr.Plot(value=size_fig, elem_id="size-accuracy-chart")
86
-
87
- # Update function for Winners, Evolution, and Size charts based on filters
88
- def update_extra_sections(show_open_only, mark_by):
89
- # Select the appropriate dataframe based on open_only filter
90
- df_to_use = test_df_open if show_open_only else test_df
91
-
92
- # Winners HTML (not affected by mark_by, only open_only)
93
- winners_html = winners_html_open if show_open_only else winners_html_all
94
-
95
- # Regenerate charts with current mark_by setting
96
- evolution_fig = create_evolution_over_time_chart(df_to_use, mark_by)
97
- size_fig = create_accuracy_by_size_chart(test_df, mark_by) # Size chart always uses full df (filters internally)
98
-
99
  return winners_html, evolution_fig, size_fig
100
-
101
- # Connect both checkbox and dropdown to update all extra sections
 
 
 
 
 
 
 
 
 
 
102
  if show_open_only_checkbox is not None:
103
  show_open_only_checkbox.change(
104
  fn=update_extra_sections,
105
- inputs=[show_open_only_checkbox, mark_by_dropdown],
106
  outputs=[winners_component, evolution_component, size_component]
107
  )
108
-
109
  if mark_by_dropdown is not None:
110
  mark_by_dropdown.change(
111
  fn=update_extra_sections,
112
- inputs=[show_open_only_checkbox if show_open_only_checkbox else gr.State(value=False), mark_by_dropdown],
113
  outputs=[winners_component, evolution_component, size_component]
114
  )
115
 
 
1
  import matplotlib
2
  matplotlib.use('Agg')
3
  import gradio as gr
4
+ import pandas as pd
5
 
6
 
7
  from ui_components import (
 
27
  CACHED_VIEWERS = {}
28
  CACHED_TAG_MAPS = {}
29
 
30
+
31
+ def filter_complete_entries(df: pd.DataFrame) -> pd.DataFrame:
32
+ if df.empty:
33
+ return df.copy()
34
+
35
+ category_score_columns = [
36
+ 'Issue Resolution Score',
37
+ 'Frontend Score',
38
+ 'Greenfield Score',
39
+ 'Testing Score',
40
+ 'Information Gathering Score',
41
+ ]
42
+
43
+ if all(column in df.columns for column in category_score_columns):
44
+ return df[df[category_score_columns].notna().all(axis=1)].copy()
45
+
46
+ if 'Categories Completed' in df.columns:
47
+ categories_completed = pd.to_numeric(df['Categories Completed'], errors='coerce')
48
+ return df[categories_completed >= 5].copy()
49
+
50
+ if 'Categories Attempted' in df.columns:
51
+ return df[df['Categories Attempted'] == '5/5'].copy()
52
+
53
+ return df.copy()
54
+
55
+
56
  def build_page():
57
  with gr.Row(elem_id="intro-row"):
58
  with gr.Column(scale=1):
 
65
 
66
  test_df, test_tag_map = get_full_leaderboard_data("test")
67
  if not test_df.empty:
68
+ show_incomplete_checkbox, show_open_only_checkbox, mark_by_dropdown = create_leaderboard_display(
 
69
  full_df=test_df,
70
  tag_map=test_tag_map,
71
  category_name=CATEGORY_NAME,
72
  split_name="test"
73
  )
74
+
75
+ test_df_complete = filter_complete_entries(test_df)
76
+ has_complete_entries = len(test_df_complete) > 0
77
+
78
  if 'Openness' in test_df.columns:
79
  test_df_open = test_df[test_df['Openness'].str.lower() == 'open'].copy()
80
  else:
81
  test_df_open = test_df.copy()
82
+ test_df_complete_open = filter_complete_entries(test_df_open)
83
+
84
+ initial_df = test_df_complete if has_complete_entries else test_df
85
+
86
  # --- Winners by Category Section ---
87
  gr.Markdown("---")
88
  gr.HTML('<h2>Winners by Category</h2>', elem_id="winners-header")
89
  gr.Markdown("Top 5 performing systems in each benchmark category.")
90
+
91
+ winners_component = gr.HTML(
92
+ create_winners_by_category_html(initial_df, top_n=5),
93
+ elem_id="winners-by-category",
94
+ )
95
+
 
96
  # --- New Visualization Sections ---
97
  gr.Markdown("---")
98
+
99
  # Evolution Over Time Section
100
  gr.HTML('<h2>Evolution Over Time</h2>', elem_id="evolution-header")
101
  gr.Markdown("Track how model performance has improved over time based on release dates.")
102
+
103
+ evolution_component = gr.Plot(
104
+ value=create_evolution_over_time_chart(initial_df, MARK_BY_DEFAULT),
105
+ elem_id="evolution-chart",
106
+ )
107
+
108
  gr.Markdown("---")
109
+
110
  # Open Model Accuracy by Size Section (always shows open models only by design)
111
  gr.HTML('<h2>Open Model Accuracy by Size</h2>', elem_id="size-accuracy-header")
112
  gr.Markdown("Compare open-weights model performance against their parameter count.")
113
+
114
+ size_component = gr.Plot(
115
+ value=create_accuracy_by_size_chart(initial_df, MARK_BY_DEFAULT),
116
+ elem_id="size-accuracy-chart",
117
+ )
118
+
119
+ def update_extra_sections(show_incomplete, show_open_only, mark_by):
120
+ include_incomplete = show_incomplete or not has_complete_entries
121
+ base_df = test_df if include_incomplete else test_df_complete
122
+ base_df_open = test_df_open if include_incomplete else test_df_complete_open
123
+ winners_df = base_df_open if show_open_only else base_df
124
+
125
+ winners_html = create_winners_by_category_html(winners_df, top_n=5)
126
+ evolution_fig = create_evolution_over_time_chart(winners_df, mark_by)
127
+ size_fig = create_accuracy_by_size_chart(base_df, mark_by)
128
+
129
  return winners_html, evolution_fig, size_fig
130
+
131
+ show_incomplete_input = show_incomplete_checkbox if show_incomplete_checkbox is not None else gr.State(value=True)
132
+ show_open_only_input = show_open_only_checkbox if show_open_only_checkbox is not None else gr.State(value=False)
133
+ extra_section_inputs = [show_incomplete_input, show_open_only_input, mark_by_dropdown]
134
+
135
+ if show_incomplete_checkbox is not None:
136
+ show_incomplete_checkbox.change(
137
+ fn=update_extra_sections,
138
+ inputs=extra_section_inputs,
139
+ outputs=[winners_component, evolution_component, size_component]
140
+ )
141
+
142
  if show_open_only_checkbox is not None:
143
  show_open_only_checkbox.change(
144
  fn=update_extra_sections,
145
+ inputs=extra_section_inputs,
146
  outputs=[winners_component, evolution_component, size_component]
147
  )
148
+
149
  if mark_by_dropdown is not None:
150
  mark_by_dropdown.change(
151
  fn=update_extra_sections,
152
+ inputs=extra_section_inputs,
153
  outputs=[winners_component, evolution_component, size_component]
154
  )
155
 
ui_components.py CHANGED
@@ -1032,8 +1032,8 @@ def create_leaderboard_display(
1032
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
1033
  )
1034
 
1035
- # Return the show_open_only_checkbox and mark_by_dropdown so they can be used to update other sections
1036
- return show_open_only_checkbox, mark_by_dropdown
1037
 
1038
  # # --- Detailed Benchmark Display ---
1039
  def create_benchmark_details_display(
 
1032
  outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
1033
  )
1034
 
1035
+ # Return the filter controls so they can be used to update other sections
1036
+ return show_incomplete_checkbox, show_open_only_checkbox, mark_by_dropdown
1037
 
1038
  # # --- Detailed Benchmark Display ---
1039
  def create_benchmark_details_display(