openhands openhands commited on
Commit
a4b9436
·
1 Parent(s): e6be394

Add Evolution Over Time and Open Model Accuracy by Size visualizations

Browse files

- Add visualizations.py with two new chart functions:
- create_evolution_over_time_chart: Line chart showing model performance evolution over release dates
- create_accuracy_by_size_chart: Scatter plot showing accuracy vs parameter count for open-weights models
- Update simple_data_loader.py to load new metadata fields (release_date, parameter_count_b, active_parameter_count_b)
- Update main_page.py to display the new visualizations below the leaderboard
- Update mock data with release dates and add sample open-weights models with parameter counts

These visualizations will display data once the openhands-index-results PR with release_date and parameter_count fields is merged.

Co-authored-by: openhands <openhands@all-hands.dev>

main_page.py CHANGED
@@ -11,6 +11,11 @@ from content import (
11
  INTRO_PARAGRAPH
12
  )
13
 
 
 
 
 
 
14
  # --- Global State for Viewers (simple caching) ---
15
  CACHED_VIEWERS = {}
16
  CACHED_TAG_MAPS = {}
@@ -33,6 +38,26 @@ def build_page():
33
  category_name=CATEGORY_NAME,
34
  split_name="test"
35
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  else:
37
  gr.Markdown("No data available.")
38
 
 
11
  INTRO_PARAGRAPH
12
  )
13
 
14
+ from visualizations import (
15
+ create_evolution_over_time_chart,
16
+ create_accuracy_by_size_chart
17
+ )
18
+
19
  # --- Global State for Viewers (simple caching) ---
20
  CACHED_VIEWERS = {}
21
  CACHED_TAG_MAPS = {}
 
38
  category_name=CATEGORY_NAME,
39
  split_name="test"
40
  )
41
+
42
+ # --- New Visualization Sections ---
43
+ gr.Markdown("---")
44
+
45
+ # Evolution Over Time Section
46
+ gr.HTML('<h2>Evolution Over Time</h2>', elem_id="evolution-header")
47
+ gr.Markdown("Track how model performance has improved over time based on release dates.")
48
+
49
+ evolution_fig = create_evolution_over_time_chart(test_df)
50
+ gr.Plot(value=evolution_fig, elem_id="evolution-chart")
51
+
52
+ gr.Markdown("---")
53
+
54
+ # Open Model Accuracy by Size Section
55
+ gr.HTML('<h2>Open Model Accuracy by Size</h2>', elem_id="size-accuracy-header")
56
+ gr.Markdown("Compare open-weights model performance against their parameter count. Marker size indicates total parameters; x-axis shows active parameters (relevant for MoE models).")
57
+
58
+ size_fig = create_accuracy_by_size_chart(test_df)
59
+ gr.Plot(value=size_fig, elem_id="size-accuracy-chart")
60
+
61
  else:
62
  gr.Markdown("No data available.")
63
 
mock_results/1.0.0-dev1/results/20250723_qwen3_coder/metadata.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "agent_version": "1.0.2",
3
+ "model": "qwen-3-coder",
4
+ "openness": "open_weights",
5
+ "tool_usage": "standard",
6
+ "submission_time": "2025-07-23T10:00:00.000000",
7
+ "directory_name": "20250723_qwen3_coder",
8
+ "release_date": "2025-07-23",
9
+ "parameter_count_b": 480,
10
+ "active_parameter_count_b": 35
11
+ }
mock_results/1.0.0-dev1/results/20250723_qwen3_coder/scores.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "benchmark": "swe-bench",
4
+ "score": 38.0,
5
+ "metric": "resolve_rate",
6
+ "cost_per_instance": 0.12,
7
+ "average_runtime": 150
8
+ },
9
+ {
10
+ "benchmark": "gaia",
11
+ "score": 48.0,
12
+ "metric": "accuracy",
13
+ "cost_per_instance": 0.06,
14
+ "average_runtime": 45
15
+ }
16
+ ]
mock_results/1.0.0-dev1/results/20251106_kimi_k2_thinking/metadata.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "agent_version": "1.0.2",
3
+ "model": "kimi-k2-thinking",
4
+ "openness": "open_weights",
5
+ "tool_usage": "standard",
6
+ "submission_time": "2025-11-06T10:00:00.000000",
7
+ "directory_name": "20251106_kimi_k2_thinking",
8
+ "release_date": "2025-11-06",
9
+ "parameter_count_b": 1000,
10
+ "active_parameter_count_b": 32
11
+ }
mock_results/1.0.0-dev1/results/20251106_kimi_k2_thinking/scores.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "benchmark": "swe-bench",
4
+ "score": 45.0,
5
+ "metric": "resolve_rate",
6
+ "cost_per_instance": 0.18,
7
+ "average_runtime": 200
8
+ },
9
+ {
10
+ "benchmark": "gaia",
11
+ "score": 52.0,
12
+ "metric": "accuracy",
13
+ "cost_per_instance": 0.10,
14
+ "average_runtime": 70
15
+ }
16
+ ]
mock_results/1.0.0-dev1/results/20251124_claude_3_5_sonnet_20241022/metadata.json CHANGED
@@ -4,5 +4,6 @@
4
  "openness": "closed_api_available",
5
  "tool_usage": "standard",
6
  "submission_time": "2025-11-24T19:56:00.092865",
7
- "directory_name": "20251124_claude_3_5_sonnet_20241022"
8
- }
 
 
4
  "openness": "closed_api_available",
5
  "tool_usage": "standard",
6
  "submission_time": "2025-11-24T19:56:00.092865",
7
+ "directory_name": "20251124_claude_3_5_sonnet_20241022",
8
+ "release_date": "2024-10-22"
9
+ }
mock_results/1.0.0-dev1/results/20251124_claude_3_opus_20240229/metadata.json CHANGED
@@ -4,5 +4,6 @@
4
  "openness": "closed_api_available",
5
  "tool_usage": "custom_interface",
6
  "submission_time": "2025-11-24T19:56:00.092922",
7
- "directory_name": "20251124_claude_3_opus_20240229"
8
- }
 
 
4
  "openness": "closed_api_available",
5
  "tool_usage": "custom_interface",
6
  "submission_time": "2025-11-24T19:56:00.092922",
7
+ "directory_name": "20251124_claude_3_opus_20240229",
8
+ "release_date": "2024-02-29"
9
+ }
mock_results/1.0.0-dev1/results/20251124_gpt_4_turbo_2024_04_09/metadata.json CHANGED
@@ -4,5 +4,6 @@
4
  "openness": "closed_api_available",
5
  "tool_usage": "standard",
6
  "submission_time": "2025-11-24T19:56:00.092908",
7
- "directory_name": "20251124_gpt_4_turbo_2024_04_09"
8
- }
 
 
4
  "openness": "closed_api_available",
5
  "tool_usage": "standard",
6
  "submission_time": "2025-11-24T19:56:00.092908",
7
+ "directory_name": "20251124_gpt_4_turbo_2024_04_09",
8
+ "release_date": "2024-04-09"
9
+ }
mock_results/1.0.0-dev1/results/20251124_gpt_4o_2024_11_20/metadata.json CHANGED
@@ -4,5 +4,6 @@
4
  "openness": "closed_api_available",
5
  "tool_usage": "standard",
6
  "submission_time": "2025-11-24T19:56:00.092895",
7
- "directory_name": "20251124_gpt_4o_2024_11_20"
8
- }
 
 
4
  "openness": "closed_api_available",
5
  "tool_usage": "standard",
6
  "submission_time": "2025-11-24T19:56:00.092895",
7
+ "directory_name": "20251124_gpt_4o_2024_11_20",
8
+ "release_date": "2024-11-20"
9
+ }
mock_results/1.0.0-dev1/results/20251124_gpt_4o_mini_2024_07_18/metadata.json CHANGED
@@ -4,5 +4,6 @@
4
  "openness": "closed_api_available",
5
  "tool_usage": "standard",
6
  "submission_time": "2025-11-24T19:56:00.092916",
7
- "directory_name": "20251124_gpt_4o_mini_2024_07_18"
8
- }
 
 
4
  "openness": "closed_api_available",
5
  "tool_usage": "standard",
6
  "submission_time": "2025-11-24T19:56:00.092916",
7
+ "directory_name": "20251124_gpt_4o_mini_2024_07_18",
8
+ "release_date": "2024-07-18"
9
+ }
mock_results/1.0.0-dev1/results/20251201_deepseek_v3/metadata.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "agent_version": "1.0.2",
3
+ "model": "deepseek-v3",
4
+ "openness": "open_weights",
5
+ "tool_usage": "standard",
6
+ "submission_time": "2025-12-01T10:00:00.000000",
7
+ "directory_name": "20251201_deepseek_v3",
8
+ "release_date": "2025-12-01",
9
+ "parameter_count_b": 685
10
+ }
mock_results/1.0.0-dev1/results/20251201_deepseek_v3/scores.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "benchmark": "swe-bench",
4
+ "score": 42.5,
5
+ "metric": "resolve_rate",
6
+ "cost_per_instance": 0.15,
7
+ "average_runtime": 180
8
+ },
9
+ {
10
+ "benchmark": "gaia",
11
+ "score": 55.0,
12
+ "metric": "accuracy",
13
+ "cost_per_instance": 0.08,
14
+ "average_runtime": 60
15
+ }
16
+ ]
simple_data_loader.py CHANGED
@@ -194,6 +194,9 @@ class SimpleLeaderboardViewer:
194
  'llm_base': metadata.get('model', 'unknown'),
195
  'openness': metadata.get('openness', 'unknown'),
196
  'submission_time': metadata.get('submission_time', ''),
 
 
 
197
  'score': score_entry.get('score'),
198
  'metric': score_entry.get('metric', 'unknown'),
199
  'cost_per_instance': score_entry.get('cost_per_instance'),
@@ -257,6 +260,10 @@ class SimpleLeaderboardViewer:
257
  'Language model': first_record['llm_base'], # Will become "Language Model"
258
  'openness': normalized_openness, # Will become "Openness" (simplified to "open" or "closed")
259
  'date': first_record['submission_time'], # Will become "Date"
 
 
 
 
260
  # Additional columns expected by the transformer
261
  # Use agent_id (version_model) as unique identifier for Pareto frontier calculation
262
  'id': agent_id,
 
194
  'llm_base': metadata.get('model', 'unknown'),
195
  'openness': metadata.get('openness', 'unknown'),
196
  'submission_time': metadata.get('submission_time', ''),
197
+ 'release_date': metadata.get('release_date', ''), # Model release date
198
+ 'parameter_count_b': metadata.get('parameter_count_b'), # Total params in billions
199
+ 'active_parameter_count_b': metadata.get('active_parameter_count_b'), # Active params for MoE
200
  'score': score_entry.get('score'),
201
  'metric': score_entry.get('metric', 'unknown'),
202
  'cost_per_instance': score_entry.get('cost_per_instance'),
 
260
  'Language model': first_record['llm_base'], # Will become "Language Model"
261
  'openness': normalized_openness, # Will become "Openness" (simplified to "open" or "closed")
262
  'date': first_record['submission_time'], # Will become "Date"
263
+ # Model metadata for visualizations
264
+ 'release_date': first_record.get('release_date', ''), # Model release date
265
+ 'parameter_count_b': first_record.get('parameter_count_b'), # Total params in billions
266
+ 'active_parameter_count_b': first_record.get('active_parameter_count_b'), # Active params for MoE
267
  # Additional columns expected by the transformer
268
  # Use agent_id (version_model) as unique identifier for Pareto frontier calculation
269
  'id': agent_id,
visualizations.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Additional visualizations for the OpenHands Index leaderboard.
3
+ """
4
+ import pandas as pd
5
+ import plotly.graph_objects as go
6
+ import plotly.express as px
7
+ from datetime import datetime
8
+ import aliases
9
+
10
+
11
+ def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
12
+ """
13
+ Create a line chart showing model performance evolution over release dates.
14
+
15
+ Args:
16
+ df: DataFrame with columns including 'release_date', 'Language Model', 'average score', 'openness'
17
+
18
+ Returns:
19
+ Plotly figure showing score evolution over time
20
+ """
21
+ if df.empty or 'release_date' not in df.columns:
22
+ fig = go.Figure()
23
+ fig.add_annotation(
24
+ text="No release date data available",
25
+ xref="paper", yref="paper",
26
+ x=0.5, y=0.5, showarrow=False,
27
+ font=dict(size=16)
28
+ )
29
+ return fig
30
+
31
+ # Filter out rows without release dates
32
+ plot_df = df[df['release_date'].notna() & (df['release_date'] != '')].copy()
33
+
34
+ if plot_df.empty:
35
+ fig = go.Figure()
36
+ fig.add_annotation(
37
+ text="No release date data available",
38
+ xref="paper", yref="paper",
39
+ x=0.5, y=0.5, showarrow=False,
40
+ font=dict(size=16)
41
+ )
42
+ return fig
43
+
44
+ # Convert release_date to datetime
45
+ plot_df['release_date'] = pd.to_datetime(plot_df['release_date'], errors='coerce')
46
+ plot_df = plot_df.dropna(subset=['release_date'])
47
+
48
+ # Sort by release date
49
+ plot_df = plot_df.sort_values('release_date')
50
+
51
+ # Get the score column
52
+ score_col = 'average score' if 'average score' in plot_df.columns else None
53
+ if score_col is None:
54
+ for col in plot_df.columns:
55
+ if 'score' in col.lower() and 'average' in col.lower():
56
+ score_col = col
57
+ break
58
+
59
+ if score_col is None:
60
+ fig = go.Figure()
61
+ fig.add_annotation(
62
+ text="No score data available",
63
+ xref="paper", yref="paper",
64
+ x=0.5, y=0.5, showarrow=False,
65
+ font=dict(size=16)
66
+ )
67
+ return fig
68
+
69
+ # Get model name column
70
+ model_col = 'Language Model' if 'Language Model' in plot_df.columns else 'Language model'
71
+
72
+ # Map openness to colors
73
+ color_map = {
74
+ aliases.CANONICAL_OPENNESS_OPEN: "#F0529C", # Pink for open
75
+ aliases.CANONICAL_OPENNESS_CLOSED: "#FFD700", # Yellow/gold for closed
76
+ }
77
+ for canonical_openness, openness_aliases in aliases.OPENNESS_ALIASES.items():
78
+ for openness_alias in openness_aliases:
79
+ color_map[openness_alias] = color_map[canonical_openness]
80
+
81
+ fig = go.Figure()
82
+
83
+ # Add scatter points for each model
84
+ for _, row in plot_df.iterrows():
85
+ openness = row.get('Openness', row.get('openness', 'unknown'))
86
+ color = color_map.get(openness, '#888888')
87
+ model_name = row.get(model_col, 'Unknown')
88
+
89
+ fig.add_trace(go.Scatter(
90
+ x=[row['release_date']],
91
+ y=[row[score_col]],
92
+ mode='markers+text',
93
+ marker=dict(
94
+ size=12,
95
+ color=color,
96
+ line=dict(width=1, color='#333333')
97
+ ),
98
+ text=[model_name],
99
+ textposition='top center',
100
+ textfont=dict(size=10),
101
+ name=model_name,
102
+ hovertemplate=(
103
+ f"<b>{model_name}</b><br>"
104
+ f"Release: %{{x|%Y-%m-%d}}<br>"
105
+ f"Score: %{{y:.1f}}<br>"
106
+ f"<extra></extra>"
107
+ ),
108
+ showlegend=False
109
+ ))
110
+
111
+ # Add trend line
112
+ if len(plot_df) > 1:
113
+ fig.add_trace(go.Scatter(
114
+ x=plot_df['release_date'],
115
+ y=plot_df[score_col],
116
+ mode='lines',
117
+ line=dict(color='#0FCB8C', width=2, dash='dash'),
118
+ name='Trend',
119
+ hoverinfo='skip',
120
+ showlegend=False
121
+ ))
122
+
123
+ # Update layout
124
+ fig.update_layout(
125
+ title=dict(
126
+ text="Model Performance Evolution Over Time",
127
+ font=dict(size=18)
128
+ ),
129
+ xaxis=dict(
130
+ title="Model Release Date",
131
+ showgrid=True,
132
+ gridcolor='rgba(128,128,128,0.2)'
133
+ ),
134
+ yaxis=dict(
135
+ title="Average Score",
136
+ showgrid=True,
137
+ gridcolor='rgba(128,128,128,0.2)'
138
+ ),
139
+ plot_bgcolor='rgba(0,0,0,0)',
140
+ paper_bgcolor='rgba(0,0,0,0)',
141
+ hovermode='closest',
142
+ margin=dict(l=60, r=40, t=60, b=60),
143
+ height=400
144
+ )
145
+
146
+ # Add legend for openness
147
+ fig.add_trace(go.Scatter(
148
+ x=[None], y=[None],
149
+ mode='markers',
150
+ marker=dict(size=10, color='#F0529C'),
151
+ name='Open Weights',
152
+ showlegend=True
153
+ ))
154
+ fig.add_trace(go.Scatter(
155
+ x=[None], y=[None],
156
+ mode='markers',
157
+ marker=dict(size=10, color='#FFD700'),
158
+ name='Closed',
159
+ showlegend=True
160
+ ))
161
+
162
+ fig.update_layout(
163
+ legend=dict(
164
+ orientation="h",
165
+ yanchor="bottom",
166
+ y=1.02,
167
+ xanchor="right",
168
+ x=1
169
+ )
170
+ )
171
+
172
+ return fig
173
+
174
+
175
+ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
176
+ """
177
+ Create a scatter plot showing accuracy vs parameter count for open-weights models.
178
+
179
+ Args:
180
+ df: DataFrame with columns including 'parameter_count_b', 'active_parameter_count_b',
181
+ 'average score', 'openness', 'Language Model'
182
+
183
+ Returns:
184
+ Plotly figure showing accuracy vs model size
185
+ """
186
+ if df.empty or 'parameter_count_b' not in df.columns:
187
+ fig = go.Figure()
188
+ fig.add_annotation(
189
+ text="No parameter count data available",
190
+ xref="paper", yref="paper",
191
+ x=0.5, y=0.5, showarrow=False,
192
+ font=dict(size=16)
193
+ )
194
+ return fig
195
+
196
+ # Filter to only open-weights models with parameter data
197
+ open_aliases = [aliases.CANONICAL_OPENNESS_OPEN] + list(aliases.OPENNESS_ALIASES.get(aliases.CANONICAL_OPENNESS_OPEN, []))
198
+
199
+ plot_df = df[
200
+ (df['parameter_count_b'].notna()) &
201
+ (df['Openness'].isin(open_aliases) | df.get('openness', pd.Series()).isin(open_aliases))
202
+ ].copy()
203
+
204
+ if plot_df.empty:
205
+ fig = go.Figure()
206
+ fig.add_annotation(
207
+ text="No open-weights models with parameter data available",
208
+ xref="paper", yref="paper",
209
+ x=0.5, y=0.5, showarrow=False,
210
+ font=dict(size=16)
211
+ )
212
+ return fig
213
+
214
+ # Get the score column
215
+ score_col = 'average score' if 'average score' in plot_df.columns else None
216
+ if score_col is None:
217
+ for col in plot_df.columns:
218
+ if 'score' in col.lower() and 'average' in col.lower():
219
+ score_col = col
220
+ break
221
+
222
+ if score_col is None:
223
+ fig = go.Figure()
224
+ fig.add_annotation(
225
+ text="No score data available",
226
+ xref="paper", yref="paper",
227
+ x=0.5, y=0.5, showarrow=False,
228
+ font=dict(size=16)
229
+ )
230
+ return fig
231
+
232
+ # Get model name column
233
+ model_col = 'Language Model' if 'Language Model' in plot_df.columns else 'Language model'
234
+
235
+ fig = go.Figure()
236
+
237
+ # Determine if we should use active params (for MoE models) or total params
238
+ # Use active params if available, otherwise total params
239
+ for _, row in plot_df.iterrows():
240
+ total_params = row['parameter_count_b']
241
+ active_params = row.get('active_parameter_count_b')
242
+ model_name = row.get(model_col, 'Unknown')
243
+ score = row[score_col]
244
+
245
+ # Use active params for x-axis if available (more meaningful for MoE)
246
+ x_val = active_params if pd.notna(active_params) else total_params
247
+
248
+ # Create hover text
249
+ hover_text = f"<b>{model_name}</b><br>"
250
+ hover_text += f"Total Params: {total_params:.0f}B<br>"
251
+ if pd.notna(active_params):
252
+ hover_text += f"Active Params: {active_params:.0f}B<br>"
253
+ hover_text += f"Score: {score:.1f}<br>"
254
+
255
+ # Marker size based on total params (larger models = larger markers)
256
+ marker_size = 10 + (total_params / 100) # Scale marker size
257
+ marker_size = min(marker_size, 30) # Cap at 30
258
+
259
+ fig.add_trace(go.Scatter(
260
+ x=[x_val],
261
+ y=[score],
262
+ mode='markers+text',
263
+ marker=dict(
264
+ size=marker_size,
265
+ color='#F0529C', # Pink for open models
266
+ line=dict(width=1, color='#333333'),
267
+ opacity=0.8
268
+ ),
269
+ text=[model_name],
270
+ textposition='top center',
271
+ textfont=dict(size=10),
272
+ name=model_name,
273
+ hovertemplate=hover_text + "<extra></extra>",
274
+ showlegend=False
275
+ ))
276
+
277
+ # Update layout
278
+ fig.update_layout(
279
+ title=dict(
280
+ text="Open Model Accuracy by Size",
281
+ font=dict(size=18)
282
+ ),
283
+ xaxis=dict(
284
+ title="Active Parameters (Billions)",
285
+ showgrid=True,
286
+ gridcolor='rgba(128,128,128,0.2)',
287
+ type='log' # Log scale for better visualization
288
+ ),
289
+ yaxis=dict(
290
+ title="Average Score",
291
+ showgrid=True,
292
+ gridcolor='rgba(128,128,128,0.2)'
293
+ ),
294
+ plot_bgcolor='rgba(0,0,0,0)',
295
+ paper_bgcolor='rgba(0,0,0,0)',
296
+ hovermode='closest',
297
+ margin=dict(l=60, r=40, t=60, b=60),
298
+ height=400
299
+ )
300
+
301
+ # Add annotation explaining marker size
302
+ fig.add_annotation(
303
+ text="Marker size indicates total parameter count",
304
+ xref="paper", yref="paper",
305
+ x=0.02, y=-0.12,
306
+ showarrow=False,
307
+ font=dict(size=10, color='gray'),
308
+ align='left'
309
+ )
310
+
311
+ return fig