Spaces:
Running
Running
openhands
commited on
Commit
·
3ad0e27
1
Parent(s):
74f33aa
Update 'Open Model Accuracy by Size' plot to match Cost/Performance style
Browse files- Add Pareto efficiency frontier line (dashed yellow)
- Use uniform logo sizes instead of parameter-based scaling
- Show model name labels only for frontier points
- Remove 'logo size indicates parameter count' annotation
- Update description text to remove marker size reference
- main_page.py +1 -1
- visualizations.py +75 -50
main_page.py
CHANGED
|
@@ -65,7 +65,7 @@ def build_page():
|
|
| 65 |
|
| 66 |
# Open Model Accuracy by Size Section
|
| 67 |
gr.HTML('<h2>Open Model Accuracy by Size</h2>', elem_id="size-accuracy-header")
|
| 68 |
-
gr.Markdown("Compare open-weights model performance against their parameter count.
|
| 69 |
|
| 70 |
size_fig = create_accuracy_by_size_chart(test_df)
|
| 71 |
gr.Plot(value=size_fig, elem_id="size-accuracy-chart")
|
|
|
|
| 65 |
|
| 66 |
# Open Model Accuracy by Size Section
|
| 67 |
gr.HTML('<h2>Open Model Accuracy by Size</h2>', elem_id="size-accuracy-header")
|
| 68 |
+
gr.Markdown("Compare open-weights model performance against their parameter count. The x-axis shows active parameters (relevant for MoE models).")
|
| 69 |
|
| 70 |
size_fig = create_accuracy_by_size_chart(test_df)
|
| 71 |
gr.Plot(value=size_fig, elem_id="size-accuracy-chart")
|
visualizations.py
CHANGED
|
@@ -343,7 +343,8 @@ def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
|
|
| 343 |
def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
|
| 344 |
"""
|
| 345 |
Create a scatter plot showing accuracy vs parameter count for open-weights models.
|
| 346 |
-
Uses company logos as markers to match the
|
|
|
|
| 347 |
|
| 348 |
Args:
|
| 349 |
df: DataFrame with columns including 'parameter_count_b' or 'Parameter_Count_B',
|
|
@@ -436,12 +437,7 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
|
|
| 436 |
fig = go.Figure()
|
| 437 |
|
| 438 |
# Prepare data for plotting
|
| 439 |
-
|
| 440 |
-
y_values = []
|
| 441 |
-
hover_texts = []
|
| 442 |
-
model_names = []
|
| 443 |
-
total_params_list = []
|
| 444 |
-
|
| 445 |
for _, row in plot_df.iterrows():
|
| 446 |
total_params = row[param_col]
|
| 447 |
active_params = row.get(active_param_col) if active_param_col else None
|
|
@@ -451,11 +447,6 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
|
|
| 451 |
# Use active params for x-axis if available (more meaningful for MoE)
|
| 452 |
x_val = active_params if pd.notna(active_params) else total_params
|
| 453 |
|
| 454 |
-
x_values.append(x_val)
|
| 455 |
-
y_values.append(score)
|
| 456 |
-
model_names.append(model_name)
|
| 457 |
-
total_params_list.append(total_params)
|
| 458 |
-
|
| 459 |
# Create hover text matching existing chart style
|
| 460 |
h_pad = " "
|
| 461 |
hover_text = f"<br>{h_pad}<b>{model_name}</b>{h_pad}<br>"
|
|
@@ -463,7 +454,17 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
|
|
| 463 |
if pd.notna(active_params):
|
| 464 |
hover_text += f"{h_pad}Active Params: <b>{active_params:.0f}B</b>{h_pad}<br>"
|
| 465 |
hover_text += f"{h_pad}Average Score: <b>{score:.1f}</b>{h_pad}<br>"
|
| 466 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
|
| 468 |
# Calculate axis ranges for domain coordinate conversion
|
| 469 |
min_x = min(x_values)
|
|
@@ -474,7 +475,33 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
|
|
| 474 |
min_score = min(y_values)
|
| 475 |
max_score = max(y_values)
|
| 476 |
y_min = min_score - 5 if min_score > 5 else 0
|
| 477 |
-
y_max = max_score +
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
|
| 479 |
# Add invisible markers for hover functionality
|
| 480 |
fig.add_trace(go.Scatter(
|
|
@@ -483,7 +510,7 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
|
|
| 483 |
mode='markers',
|
| 484 |
name='Models',
|
| 485 |
showlegend=False,
|
| 486 |
-
text=
|
| 487 |
hoverinfo='text',
|
| 488 |
marker=dict(
|
| 489 |
color='rgba(0,0,0,0)', # Invisible markers
|
|
@@ -492,11 +519,14 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
|
|
| 492 |
)
|
| 493 |
))
|
| 494 |
|
| 495 |
-
# Add company logo images for each data point
|
| 496 |
layout_images = []
|
| 497 |
-
frontier_labels_data = []
|
| 498 |
|
| 499 |
-
for
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
company_info = get_company_from_model(model_name)
|
| 501 |
logo_path = company_info['path']
|
| 502 |
|
|
@@ -517,43 +547,48 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
|
|
| 517 |
domain_y = (y_val - y_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0.5
|
| 518 |
|
| 519 |
# Clamp to valid range
|
| 520 |
-
domain_x = max(0
|
| 521 |
-
domain_y = max(0
|
| 522 |
-
|
| 523 |
-
# Scale logo size based on total params
|
| 524 |
-
size_scale = 0.03 + (total_params / 2000) # Larger models = larger logos
|
| 525 |
-
size_scale = min(size_scale, 0.06) # Cap size
|
| 526 |
|
|
|
|
| 527 |
layout_images.append(dict(
|
| 528 |
source=logo_uri,
|
| 529 |
xref="x domain",
|
| 530 |
yref="y domain",
|
| 531 |
x=domain_x,
|
| 532 |
y=domain_y,
|
| 533 |
-
sizex=
|
| 534 |
-
sizey=
|
| 535 |
xanchor="center",
|
| 536 |
yanchor="middle",
|
| 537 |
layer="above"
|
| 538 |
))
|
| 539 |
-
|
| 540 |
-
# Store label data for annotation
|
| 541 |
-
frontier_labels_data.append({
|
| 542 |
-
'x': domain_x,
|
| 543 |
-
'y': domain_y,
|
| 544 |
-
'label': model_name
|
| 545 |
-
})
|
| 546 |
except Exception:
|
| 547 |
pass
|
| 548 |
|
| 549 |
-
# Add model name labels
|
| 550 |
-
for
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 551 |
fig.add_annotation(
|
| 552 |
-
x=
|
| 553 |
-
y=
|
| 554 |
-
|
| 555 |
-
yref="y domain",
|
| 556 |
-
text=item['label'],
|
| 557 |
showarrow=False,
|
| 558 |
yshift=25,
|
| 559 |
font=STANDARD_FONT,
|
|
@@ -582,16 +617,6 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
|
|
| 582 |
|
| 583 |
fig.update_layout(**layout_config)
|
| 584 |
|
| 585 |
-
# Add annotation explaining marker size
|
| 586 |
-
fig.add_annotation(
|
| 587 |
-
text="Logo size indicates total parameter count",
|
| 588 |
-
xref="paper", yref="paper",
|
| 589 |
-
x=0.02, y=-0.08,
|
| 590 |
-
showarrow=False,
|
| 591 |
-
font=STANDARD_FONT,
|
| 592 |
-
align='left'
|
| 593 |
-
)
|
| 594 |
-
|
| 595 |
# Add OpenHands branding
|
| 596 |
add_branding_to_figure(fig)
|
| 597 |
|
|
|
|
| 343 |
def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
|
| 344 |
"""
|
| 345 |
Create a scatter plot showing accuracy vs parameter count for open-weights models.
|
| 346 |
+
Uses company logos as markers to match the Cost/Performance chart styling.
|
| 347 |
+
Includes a Pareto efficiency frontier line.
|
| 348 |
|
| 349 |
Args:
|
| 350 |
df: DataFrame with columns including 'parameter_count_b' or 'Parameter_Count_B',
|
|
|
|
| 437 |
fig = go.Figure()
|
| 438 |
|
| 439 |
# Prepare data for plotting
|
| 440 |
+
data_points = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
for _, row in plot_df.iterrows():
|
| 442 |
total_params = row[param_col]
|
| 443 |
active_params = row.get(active_param_col) if active_param_col else None
|
|
|
|
| 447 |
# Use active params for x-axis if available (more meaningful for MoE)
|
| 448 |
x_val = active_params if pd.notna(active_params) else total_params
|
| 449 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 450 |
# Create hover text matching existing chart style
|
| 451 |
h_pad = " "
|
| 452 |
hover_text = f"<br>{h_pad}<b>{model_name}</b>{h_pad}<br>"
|
|
|
|
| 454 |
if pd.notna(active_params):
|
| 455 |
hover_text += f"{h_pad}Active Params: <b>{active_params:.0f}B</b>{h_pad}<br>"
|
| 456 |
hover_text += f"{h_pad}Average Score: <b>{score:.1f}</b>{h_pad}<br>"
|
| 457 |
+
|
| 458 |
+
data_points.append({
|
| 459 |
+
'x': x_val,
|
| 460 |
+
'y': score,
|
| 461 |
+
'model_name': model_name,
|
| 462 |
+
'hover_text': hover_text,
|
| 463 |
+
'total_params': total_params
|
| 464 |
+
})
|
| 465 |
+
|
| 466 |
+
x_values = [p['x'] for p in data_points]
|
| 467 |
+
y_values = [p['y'] for p in data_points]
|
| 468 |
|
| 469 |
# Calculate axis ranges for domain coordinate conversion
|
| 470 |
min_x = min(x_values)
|
|
|
|
| 475 |
min_score = min(y_values)
|
| 476 |
max_score = max(y_values)
|
| 477 |
y_min = min_score - 5 if min_score > 5 else 0
|
| 478 |
+
y_max = max_score + 5
|
| 479 |
+
|
| 480 |
+
# Calculate and draw Pareto Efficiency Frontier
|
| 481 |
+
# For size vs accuracy, we want: smaller size (lower x) AND higher accuracy (higher y)
|
| 482 |
+
# Sort by x ascending, then track maximum y seen
|
| 483 |
+
sorted_data = sorted(data_points, key=lambda p: (p['x'], -p['y']))
|
| 484 |
+
frontier_points = []
|
| 485 |
+
frontier_rows = []
|
| 486 |
+
max_score_so_far = float('-inf')
|
| 487 |
+
|
| 488 |
+
for point in sorted_data:
|
| 489 |
+
if point['y'] >= max_score_so_far:
|
| 490 |
+
frontier_points.append({'x': point['x'], 'y': point['y']})
|
| 491 |
+
frontier_rows.append(point)
|
| 492 |
+
max_score_so_far = point['y']
|
| 493 |
+
|
| 494 |
+
if frontier_points:
|
| 495 |
+
frontier_df = pd.DataFrame(frontier_points)
|
| 496 |
+
fig.add_trace(go.Scatter(
|
| 497 |
+
x=frontier_df['x'],
|
| 498 |
+
y=frontier_df['y'],
|
| 499 |
+
mode='lines',
|
| 500 |
+
name='Efficiency Frontier',
|
| 501 |
+
showlegend=False,
|
| 502 |
+
line=dict(color='#FFE165', width=2, dash='dash'), # primary yellow
|
| 503 |
+
hoverinfo='skip'
|
| 504 |
+
))
|
| 505 |
|
| 506 |
# Add invisible markers for hover functionality
|
| 507 |
fig.add_trace(go.Scatter(
|
|
|
|
| 510 |
mode='markers',
|
| 511 |
name='Models',
|
| 512 |
showlegend=False,
|
| 513 |
+
text=[p['hover_text'] for p in data_points],
|
| 514 |
hoverinfo='text',
|
| 515 |
marker=dict(
|
| 516 |
color='rgba(0,0,0,0)', # Invisible markers
|
|
|
|
| 519 |
)
|
| 520 |
))
|
| 521 |
|
| 522 |
+
# Add company logo images for each data point (uniform size like Cost/Performance chart)
|
| 523 |
layout_images = []
|
|
|
|
| 524 |
|
| 525 |
+
for point in data_points:
|
| 526 |
+
x_val = point['x']
|
| 527 |
+
y_val = point['y']
|
| 528 |
+
model_name = point['model_name']
|
| 529 |
+
|
| 530 |
company_info = get_company_from_model(model_name)
|
| 531 |
logo_path = company_info['path']
|
| 532 |
|
|
|
|
| 547 |
domain_y = (y_val - y_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0.5
|
| 548 |
|
| 549 |
# Clamp to valid range
|
| 550 |
+
domain_x = max(0, min(1, domain_x))
|
| 551 |
+
domain_y = max(0, min(1, domain_y))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 552 |
|
| 553 |
+
# Uniform logo size (same as Cost/Performance chart)
|
| 554 |
layout_images.append(dict(
|
| 555 |
source=logo_uri,
|
| 556 |
xref="x domain",
|
| 557 |
yref="y domain",
|
| 558 |
x=domain_x,
|
| 559 |
y=domain_y,
|
| 560 |
+
sizex=0.04, # Size as fraction of plot width
|
| 561 |
+
sizey=0.06, # Size as fraction of plot height
|
| 562 |
xanchor="center",
|
| 563 |
yanchor="middle",
|
| 564 |
layer="above"
|
| 565 |
))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 566 |
except Exception:
|
| 567 |
pass
|
| 568 |
|
| 569 |
+
# Add model name labels for frontier points only (like Cost/Performance chart)
|
| 570 |
+
for point in frontier_rows:
|
| 571 |
+
x_val = point['x']
|
| 572 |
+
y_val = point['y']
|
| 573 |
+
model_name = point['model_name']
|
| 574 |
+
|
| 575 |
+
# Clean model name for label
|
| 576 |
+
if isinstance(model_name, list):
|
| 577 |
+
model_name = model_name[0] if model_name else ''
|
| 578 |
+
model_name = str(model_name).split('/')[-1]
|
| 579 |
+
if len(model_name) > 25:
|
| 580 |
+
model_name = model_name[:22] + '...'
|
| 581 |
+
|
| 582 |
+
# Transform x to log10 for annotation positioning on log scale
|
| 583 |
+
if x_val > 0:
|
| 584 |
+
x_log = np.log10(x_val)
|
| 585 |
+
else:
|
| 586 |
+
x_log = x_min_log
|
| 587 |
+
|
| 588 |
fig.add_annotation(
|
| 589 |
+
x=x_log,
|
| 590 |
+
y=y_val,
|
| 591 |
+
text=model_name,
|
|
|
|
|
|
|
| 592 |
showarrow=False,
|
| 593 |
yshift=25,
|
| 594 |
font=STANDARD_FONT,
|
|
|
|
| 617 |
|
| 618 |
fig.update_layout(**layout_config)
|
| 619 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 620 |
# Add OpenHands branding
|
| 621 |
add_branding_to_figure(fig)
|
| 622 |
|