Spaces:

OpenHands
/

openhands-index

Running

openhands openhands commited on Jan 27

Commit

1f75b60

1 Parent(s): f1798d2

Refactor: Create generic create_scatter_chart() as single source of truth

Major refactoring to eliminate code duplication across scatter plots:

- Add create_scatter_chart() in leaderboard_transformer.py (~270 lines)
- Handles all scatter plot types: cost, runtime, date, params
- Configurable x-axis type (log or date)
- Configurable Pareto frontier direction
- Consistent marker icons, hover text, and styling
- Auto-detects column names

- Add STANDARD_LAYOUT and STANDARD_FONT constants for shared styling

- Simplify visualizations.py from 536 lines to 159 lines
- create_evolution_over_time_chart() now uses generic function
- create_accuracy_by_size_chart() now uses generic function
- Only contains data filtering and column detection logic

Benefits:
- Single source of truth for all scatter plot styling
- Consistent fonts (Arial) across all charts
- Easier to maintain and extend
- ~375 lines of code removed

Co-authored-by: openhands <openhands@all-hands.dev>

Files changed (2) hide show

leaderboard_transformer.py +303 -0
visualizations.py +55 -468

leaderboard_transformer.py CHANGED Viewed

@@ -241,6 +241,309 @@ def get_marker_icon(model_name: str, openness: str, mark_by: str) -> dict:
         return get_company_from_model(model_name)
 INFORMAL_TO_FORMAL_NAME_MAP = {
     # Short Names
     "lit": "Literature Understanding",

         return get_company_from_model(model_name)
+# Standard layout configuration for all charts
+STANDARD_LAYOUT = dict(
+    template="plotly_white",
+    height=572,
+    font=dict(
+        family=FONT_FAMILY,
+        color="#0D0D0F",  # neutral-950
+    ),
+    hoverlabel=dict(
+        bgcolor="#222328",  # neutral-800
+        font_size=12,
+        font_family=FONT_FAMILY_SHORT,
+        font_color="#F7F8FB",  # neutral-50
+    ),
+    legend=dict(
+        bgcolor='#F7F8FB',  # neutral-50
+    ),
+    margin=dict(b=80),  # Extra margin for logo and URL
+)
+# Standard font for annotations
+STANDARD_FONT = dict(
+    size=10,
+    color='#0D0D0F',  # neutral-950
+    family=FONT_FAMILY_SHORT
+)
+def create_scatter_chart(
+    df: pd.DataFrame,
+    x_col: str,
+    y_col: str,
+    title: str,
+    x_label: str,
+    y_label: str = "Average Score",
+    mark_by: str = None,
+    x_type: str = "log",  # "log" or "date"
+    pareto_lower_is_better: bool = True,  # For x-axis: True means lower x is better
+    model_col: str = None,
+    openness_col: str = None,
+) -> go.Figure:
+    """
+    Generic scatter chart with Pareto frontier, marker icons, and consistent styling.
+    This is the single source of truth for all scatter plots in the application.
+    Args:
+        df: DataFrame with the data to plot
+        x_col: Column name for x-axis values
+        y_col: Column name for y-axis values (typically score)
+        title: Chart title
+        x_label: X-axis label
+        y_label: Y-axis label (default: "Average Score")
+        mark_by: One of "Company", "Openness", or "Country" for marker icons
+        x_type: "log" for logarithmic scale, "date" for datetime scale
+        pareto_lower_is_better: If True, lower x values are better (cost, size);
+                                If False, higher x values are better (time evolution)
+        model_col: Column name for model names (auto-detected if None)
+        openness_col: Column name for openness values (auto-detected if None)
+    Returns:
+        Plotly figure with scatter plot, Pareto frontier, and branding
+    """
+    from constants import MARK_BY_DEFAULT
+    if mark_by is None:
+        mark_by = MARK_BY_DEFAULT
+    # Auto-detect column names if not provided
+    if model_col is None:
+        for col in ['Language Model', 'Language model', 'llm_base']:
+            if col in df.columns:
+                model_col = col
+                break
+        if model_col is None:
+            model_col = 'Language Model'
+    if openness_col is None:
+        openness_col = 'Openness' if 'Openness' in df.columns else 'openness'
+    # Prepare data
+    plot_df = df.copy()
+    # Ensure required columns exist
+    if x_col not in plot_df.columns or y_col not in plot_df.columns:
+        fig = go.Figure()
+        fig.add_annotation(
+            text="Required data columns not available",
+            xref="paper", yref="paper",
+            x=0.5, y=0.5, showarrow=False,
+            font=STANDARD_FONT
+        )
+        fig.update_layout(**STANDARD_LAYOUT, title=title)
+        return fig
+    # Convert to appropriate types
+    plot_df[y_col] = pd.to_numeric(plot_df[y_col], errors='coerce')
+    if x_type == "date":
+        plot_df[x_col] = pd.to_datetime(plot_df[x_col], errors='coerce')
+    else:
+        plot_df[x_col] = pd.to_numeric(plot_df[x_col], errors='coerce')
+    # Drop rows with missing values
+    plot_df = plot_df.dropna(subset=[x_col, y_col])
+    if plot_df.empty:
+        fig = go.Figure()
+        fig.add_annotation(
+            text="No valid data points available",
+            xref="paper", yref="paper",
+            x=0.5, y=0.5, showarrow=False,
+            font=STANDARD_FONT
+        )
+        fig.update_layout(**STANDARD_LAYOUT, title=title)
+        return fig
+    fig = go.Figure()
+    # Calculate axis ranges
+    x_values = plot_df[x_col].tolist()
+    y_values = plot_df[y_col].tolist()
+    if x_type == "log":
+        min_x = min(x_values)
+        max_x = max(x_values)
+        x_range_log = [np.log10(min_x * 0.5) if min_x > 0 else -2,
+                       np.log10(max_x * 1.5) if max_x > 0 else 2]
+    else:
+        min_x = min(x_values)
+        max_x = max(x_values)
+        if x_type == "date":
+            x_padding = (max_x - min_x) * 0.1 if max_x != min_x else pd.Timedelta(days=15)
+            x_range = [min_x - x_padding, max_x + x_padding]
+        else:
+            x_range = None
+    min_y = min(y_values)
+    max_y = max(y_values)
+    y_range = [min_y - 5 if min_y > 5 else 0, max_y + 5]
+    # Calculate Pareto frontier
+    frontier_rows = []
+    if pareto_lower_is_better:
+        # Lower x is better (cost, params): sort by x ascending, track max y
+        sorted_df = plot_df.sort_values(by=[x_col, y_col], ascending=[True, False])
+        max_score = float('-inf')
+        for _, row in sorted_df.iterrows():
+            if row[y_col] >= max_score:
+                frontier_rows.append(row)
+                max_score = row[y_col]
+    else:
+        # Higher x is better (time): sort by x ascending, track max y seen so far
+        sorted_df = plot_df.sort_values(by=x_col, ascending=True)
+        max_score = float('-inf')
+        for _, row in sorted_df.iterrows():
+            if row[y_col] > max_score:
+                frontier_rows.append(row)
+                max_score = row[y_col]
+    # Draw Pareto frontier line
+    if frontier_rows:
+        frontier_x = [row[x_col] for row in frontier_rows]
+        frontier_y = [row[y_col] for row in frontier_rows]
+        fig.add_trace(go.Scatter(
+            x=frontier_x,
+            y=frontier_y,
+            mode='lines',
+            name='Pareto Frontier',
+            showlegend=False,
+            line=dict(color='#FFE165', width=2, dash='dash'),
+            hoverinfo='skip'
+        ))
+    # Prepare hover text for all points
+    hover_texts = []
+    for _, row in plot_df.iterrows():
+        model_name = row.get(model_col, 'Unknown')
+        if isinstance(model_name, list):
+            model_name = model_name[0] if model_name else 'Unknown'
+        model_name = str(model_name).split('/')[-1]
+        h_pad = "   "
+        hover_text = f"<br>{h_pad}<b>{model_name}</b>{h_pad}<br>"
+        hover_text += f"{h_pad}{x_label}: <b>{row[x_col]}</b>{h_pad}<br>"
+        hover_text += f"{h_pad}{y_label}: <b>{row[y_col]:.1f}</b>{h_pad}<br>"
+        hover_texts.append(hover_text)
+    # Add invisible scatter trace for hover detection
+    fig.add_trace(go.Scatter(
+        x=plot_df[x_col],
+        y=plot_df[y_col],
+        mode='markers',
+        name='Models',
+        showlegend=False,
+        text=hover_texts,
+        hoverinfo='text',
+        marker=dict(color='rgba(0,0,0,0)', size=25, opacity=0)
+    ))
+    # Add marker icon images
+    layout_images = []
+    for _, row in plot_df.iterrows():
+        x_val = row[x_col]
+        y_val = row[y_col]
+        model_name = row.get(model_col, '')
+        openness = row.get(openness_col, '')
+        marker_info = get_marker_icon(model_name, openness, mark_by)
+        logo_path = marker_info['path']
+        if os.path.exists(logo_path):
+            try:
+                with open(logo_path, 'rb') as f:
+                    encoded_logo = base64.b64encode(f.read()).decode('utf-8')
+                logo_uri = f"data:image/svg+xml;base64,{encoded_logo}"
+                # Convert to domain coordinates (0-1 range)
+                if x_type == "log" and x_val > 0:
+                    log_x = np.log10(x_val)
+                    domain_x = (log_x - x_range_log[0]) / (x_range_log[1] - x_range_log[0])
+                elif x_type == "date":
+                    total_range = (max_x - min_x).total_seconds() if max_x != min_x else 1
+                    domain_x = ((x_val - min_x).total_seconds() / total_range) if total_range else 0.5
+                else:
+                    domain_x = 0.5
+                domain_y = (y_val - y_range[0]) / (y_range[1] - y_range[0]) if (y_range[1] - y_range[0]) > 0 else 0.5
+                # Clamp to valid range
+                domain_x = max(0, min(1, domain_x))
+                domain_y = max(0, min(1, domain_y))
+                layout_images.append(dict(
+                    source=logo_uri,
+                    xref="x domain",
+                    yref="y domain",
+                    x=domain_x,
+                    y=domain_y,
+                    sizex=0.04,
+                    sizey=0.06,
+                    xanchor="center",
+                    yanchor="middle",
+                    layer="above"
+                ))
+            except Exception:
+                pass
+    # Add labels for frontier points only
+    for row in frontier_rows:
+        model_name = row.get(model_col, '')
+        if isinstance(model_name, list):
+            model_name = model_name[0] if model_name else ''
+        model_name = str(model_name).split('/')[-1]
+        if len(model_name) > 25:
+            model_name = model_name[:22] + '...'
+        x_val = row[x_col]
+        y_val = row[y_col]
+        # For log scale, annotation x needs to be in log space
+        if x_type == "log":
+            ann_x = np.log10(x_val) if x_val > 0 else 0
+        else:
+            ann_x = x_val
+        fig.add_annotation(
+            x=ann_x,
+            y=y_val,
+            text=model_name,
+            showarrow=False,
+            yshift=20,
+            font=STANDARD_FONT,
+            xanchor='center',
+            yanchor='bottom'
+        )
+    # Configure layout
+    xaxis_config = dict(title=x_label)
+    if x_type == "log":
+        xaxis_config['type'] = 'log'
+        xaxis_config['range'] = x_range_log
+    elif x_type == "date":
+        xaxis_config['range'] = x_range
+    layout_config = dict(
+        **STANDARD_LAYOUT,
+        title=title,
+        xaxis=xaxis_config,
+        yaxis=dict(title=y_label, range=y_range),
+    )
+    if layout_images:
+        layout_config['images'] = layout_images
+    fig.update_layout(**layout_config)
+    # Add branding
+    add_branding_to_figure(fig)
+    return fig
 INFORMAL_TO_FORMAL_NAME_MAP = {
     # Short Names
     "lit": "Literature Understanding",

visualizations.py CHANGED Viewed

@@ -1,73 +1,38 @@
 """
 Additional visualizations for the OpenHands Index leaderboard.
 """
 import pandas as pd
 import plotly.graph_objects as go
-import plotly.express as px
-from datetime import datetime
-import os
-import base64
 import aliases
-from constants import FONT_FAMILY, FONT_FAMILY_SHORT
-# Import shared utilities from leaderboard_transformer
-from leaderboard_transformer import (
-    get_company_from_model,
-    get_marker_icon,
-    add_branding_to_figure,
-)
-from ui_components import get_svg_as_data_uri
-from constants import MARK_BY_DEFAULT
-# Standard layout configuration matching existing charts
-# Colors aligned with OpenHands brand
-STANDARD_LAYOUT = dict(
-    template="plotly_white",
-    height=572,
-    font=dict(
-        family=FONT_FAMILY,
-        color="#0D0D0F",  # neutral-950
-    ),
-    hoverlabel=dict(
-        bgcolor="#222328",  # neutral-800
-        font_size=12,
-        font_family=FONT_FAMILY_SHORT,
-        font_color="#F7F8FB",  # neutral-50
-    ),
-    legend=dict(
-        bgcolor='#F7F8FB',  # neutral-50
-    ),
-    margin=dict(b=80),  # Extra margin for logo and URL
-)
-# Standard font for annotations - uses constants for consistency
-STANDARD_FONT = dict(
-    size=10,
-    color='#0D0D0F',  # neutral-950
-    family=FONT_FAMILY_SHORT
-)
 def create_evolution_over_time_chart(df: pd.DataFrame, mark_by: str = None) -> go.Figure:
     """
     Create a chart showing model performance evolution over release dates.
-    Uses company logos as markers to match the existing chart styling.
     Args:
-        df: DataFrame with columns including 'release_date' or 'Release_Date', 'Language Model', 'average score', 'openness'
-        mark_by: One of "Company", "Openness", or "Country" - controls which icon to display
     Returns:
         Plotly figure showing score evolution over time
     """
-    if mark_by is None:
-        mark_by = MARK_BY_DEFAULT
-    # Handle different column name formats
-    release_date_col = None
-    for col in ['release_date', 'Release_Date', 'Release Date']:
-        if col in df.columns:
-            release_date_col = col
-            break
     if df.empty or release_date_col is None:
         fig = go.Figure()
@@ -77,38 +42,14 @@ def create_evolution_over_time_chart(df: pd.DataFrame, mark_by: str = None) -> g
             x=0.5, y=0.5, showarrow=False,
             font=STANDARD_FONT
         )
-        fig.update_layout(**STANDARD_LAYOUT)
         return fig
-    # Filter out rows without release dates
-    plot_df = df[df[release_date_col].notna() & (df[release_date_col] != '')].copy()
-    if plot_df.empty:
-        fig = go.Figure()
-        fig.add_annotation(
-            text="No release date data available",
-            xref="paper", yref="paper",
-            x=0.5, y=0.5, showarrow=False,
-            font=STANDARD_FONT
-        )
-        fig.update_layout(**STANDARD_LAYOUT)
-        return fig
-    # Convert release_date to datetime (normalize column name)
-    plot_df['release_date'] = pd.to_datetime(plot_df[release_date_col], errors='coerce')
-    plot_df = plot_df.dropna(subset=['release_date'])
-    # Sort by release date
-    plot_df = plot_df.sort_values('release_date')
-    # Get the score column (handle different naming conventions)
-    score_col = None
-    for col in ['average score', 'Average Score', 'Average score']:
-        if col in plot_df.columns:
-            score_col = col
-            break
     if score_col is None:
-        for col in plot_df.columns:
             if 'score' in col.lower() and 'average' in col.lower():
                 score_col = col
                 break
@@ -121,202 +62,36 @@ def create_evolution_over_time_chart(df: pd.DataFrame, mark_by: str = None) -> g
             x=0.5, y=0.5, showarrow=False,
             font=STANDARD_FONT
         )
-        fig.update_layout(**STANDARD_LAYOUT)
         return fig
-    # Get model name column
-    model_col = None
-    for col in ['Language Model', 'Language model', 'llm_base']:
-        if col in plot_df.columns:
-            model_col = col
-            break
-    if model_col is None:
-        model_col = 'Language Model'  # Default
-    fig = go.Figure()
-    # Add Pareto frontier line (monotonically increasing best score over time)
-    # Also track which rows are on the frontier for labeling
-    frontier_rows = []
-    if len(plot_df) > 1:
-        # Compute Pareto frontier: only include points that set a new best score
-        frontier_dates = []
-        frontier_scores = []
-        max_score_so_far = float('-inf')
-        for _, row in plot_df.iterrows():
-            current_score = row[score_col]
-            current_date = row['release_date']
-            if current_score > max_score_so_far:
-                # This point is on the Pareto frontier
-                frontier_dates.append(current_date)
-                frontier_scores.append(current_score)
-                frontier_rows.append(row)
-                max_score_so_far = current_score
-        if frontier_dates:
-            fig.add_trace(go.Scatter(
-                x=frontier_dates,
-                y=frontier_scores,
-                mode='lines',
-                line=dict(color='#FFE165', width=2, dash='dash'),  # primary yellow, dashed
-                name='Pareto Frontier',
-                hoverinfo='skip',
-                showlegend=False
-            ))
-    # Calculate axis ranges
-    min_date = plot_df['release_date'].min()
-    max_date = plot_df['release_date'].max()
-    min_score = plot_df[score_col].min()
-    max_score = plot_df[score_col].max()
-    y_min = min_score - 5 if min_score > 5 else 0
-    y_max = max_score + 10  # Extra space for labels
-    # Build hover text for each point
-    hover_texts = []
-    for _, row in plot_df.iterrows():
-        model_name = row.get(model_col, 'Unknown')
-        openness = row.get('Openness', row.get('openness', 'unknown'))
-        h_pad = "   "
-        hover_text = f"<br>{h_pad}<b>{model_name}</b>{h_pad}<br>"
-        hover_text += f"{h_pad}Release: <b>{row['release_date'].strftime('%Y-%m-%d')}</b>{h_pad}<br>"
-        hover_text += f"{h_pad}Average Score: <b>{row[score_col]:.1f}</b>{h_pad}<br>"
-        hover_text += f"{h_pad}Openness: <b>{openness}</b>{h_pad}<br>"
-        hover_texts.append(hover_text)
-    plot_df['hover_text'] = hover_texts
-    # Add invisible markers for hover functionality
-    fig.add_trace(go.Scatter(
-        x=plot_df['release_date'],
-        y=plot_df[score_col],
-        mode='markers',
-        name='Models',
-        showlegend=False,
-        text=plot_df['hover_text'],
-        hoverinfo='text',
-        marker=dict(
-            color='rgba(0,0,0,0)',  # Invisible markers
-            size=25,  # Large enough for hover detection
-            opacity=0
-        )
-    ))
-    # Add marker icon images for each data point using data coordinates
-    layout_images = []
-    openness_col = 'Openness' if 'Openness' in plot_df.columns else 'openness'
-    for _, row in plot_df.iterrows():
-        model_name = row.get(model_col, '')
-        openness = row.get(openness_col, '')
-        marker_info = get_marker_icon(model_name, openness, mark_by)
-        logo_path = marker_info['path']
-        # Read the SVG file and encode as base64 data URI
-        if os.path.exists(logo_path):
-            try:
-                with open(logo_path, 'rb') as f:
-                    encoded_logo = base64.b64encode(f.read()).decode('utf-8')
-                    logo_uri = f"data:image/svg+xml;base64,{encoded_logo}"
-                    x_val = row['release_date']
-                    y_val = row[score_col]
-                    # Use data coordinates for precise alignment
-                    layout_images.append(dict(
-                        source=logo_uri,
-                        xref="x",
-                        yref="y",
-                        x=x_val,
-                        y=y_val,
-                        sizex=15 * 24 * 60 * 60 * 1000,  # ~15 days in milliseconds
-                        sizey=3,  # score units
-                        xanchor="center",
-                        yanchor="middle",
-                        layer="above"
-                    ))
-            except Exception:
-                pass
-    # Add model name labels only for frontier points
-    for row in frontier_rows:
-        model_name = row.get(model_col, '')
-        x_val = row['release_date']
-        y_val = row[score_col]
-        # Clean model name for label
-        if isinstance(model_name, list):
-            model_name = model_name[0] if model_name else ''
-        model_name = str(model_name).split('/')[-1]
-        if len(model_name) > 25:
-            model_name = model_name[:22] + '...'
-        fig.add_annotation(
-            x=x_val,
-            y=y_val,
-            xref="x",
-            yref="y",
-            text=model_name,
-            showarrow=False,
-            yshift=20,
-            font=STANDARD_FONT,
-            xanchor='center',
-            yanchor='bottom'
-        )
-    # Build layout configuration
-    layout_config = dict(
-        **STANDARD_LAYOUT,
         title="Model Performance Evolution Over Time",
-        xaxis=dict(
-            title="Model Release Date",
-            range=[min_date - pd.Timedelta(days=15), max_date + pd.Timedelta(days=15)]
-        ),
-        yaxis=dict(
-            title="Average Score",
-            range=[y_min, y_max]
-        ),
     )
-    # Add company logo images to the layout
-    if layout_images:
-        layout_config['images'] = layout_images
-    fig.update_layout(**layout_config)
-    # Add OpenHands branding
-    add_branding_to_figure(fig)
-    return fig
 def create_accuracy_by_size_chart(df: pd.DataFrame, mark_by: str = None) -> go.Figure:
     """
     Create a scatter plot showing accuracy vs parameter count for open-weights models.
-    Uses company logos as markers to match the Cost/Performance chart styling.
-    Includes a Pareto efficiency frontier line.
     Args:
-        df: DataFrame with columns including 'parameter_count_b' or 'Parameter_Count_B',
-            'average score', 'openness', 'Language Model'
-        mark_by: One of "Company", "Openness", or "Country" - controls which icon to display
     Returns:
-        Plotly figure showing accuracy vs model size (total parameters)
     """
-    import numpy as np
-    if mark_by is None:
-        mark_by = MARK_BY_DEFAULT
-    # Handle different column name formats for parameter count
-    param_col = None
-    for col in ['parameter_count_b', 'Parameter_Count_B', 'Parameter Count B']:
-        if col in df.columns:
-            param_col = col
-            break
     if df.empty or param_col is None:
         fig = go.Figure()
@@ -326,13 +101,13 @@ def create_accuracy_by_size_chart(df: pd.DataFrame, mark_by: str = None) -> go.F
             x=0.5, y=0.5, showarrow=False,
             font=STANDARD_FONT
         )
-        fig.update_layout(**STANDARD_LAYOUT)
         return fig
-    # Filter to only open-weights models with parameter data
-    open_aliases = [aliases.CANONICAL_OPENNESS_OPEN] + list(aliases.OPENNESS_ALIASES.get(aliases.CANONICAL_OPENNESS_OPEN, []))
-    # Get openness column
     openness_col = 'Openness' if 'Openness' in df.columns else 'openness'
     plot_df = df[
@@ -348,15 +123,11 @@ def create_accuracy_by_size_chart(df: pd.DataFrame, mark_by: str = None) -> go.F
             x=0.5, y=0.5, showarrow=False,
             font=STANDARD_FONT
         )
-        fig.update_layout(**STANDARD_LAYOUT)
         return fig
-    # Get the score column (handle different naming conventions)
-    score_col = None
-    for col in ['average score', 'Average Score', 'Average score']:
-        if col in plot_df.columns:
-            score_col = col
-            break
     if score_col is None:
         for col in plot_df.columns:
             if 'score' in col.lower() and 'average' in col.lower():
@@ -371,202 +142,18 @@ def create_accuracy_by_size_chart(df: pd.DataFrame, mark_by: str = None) -> go.F
             x=0.5, y=0.5, showarrow=False,
             font=STANDARD_FONT
         )
-        fig.update_layout(**STANDARD_LAYOUT)
         return fig
-    # Get model name column
-    model_col = None
-    for col in ['Language Model', 'Language model', 'llm_base']:
-        if col in plot_df.columns:
-            model_col = col
-            break
-    if model_col is None:
-        model_col = 'Language Model'  # Default
-    fig = go.Figure()
-    # Prepare data for plotting
-    data_points = []
-    for _, row in plot_df.iterrows():
-        total_params = row[param_col]
-        model_name = row.get(model_col, 'Unknown')
-        score = row[score_col]
-        openness = row.get(openness_col, '')
-        # Use total params for x-axis
-        x_val = total_params
-        # Create hover text matching existing chart style
-        h_pad = "   "
-        hover_text = f"<br>{h_pad}<b>{model_name}</b>{h_pad}<br>"
-        hover_text += f"{h_pad}Parameters: <b>{total_params:.0f}B</b>{h_pad}<br>"
-        hover_text += f"{h_pad}Average Score: <b>{score:.1f}</b>{h_pad}<br>"
-        data_points.append({
-            'x': x_val,
-            'y': score,
-            'model_name': model_name,
-            'hover_text': hover_text,
-            'total_params': total_params,
-            'openness': openness
-        })
-    x_values = [p['x'] for p in data_points]
-    y_values = [p['y'] for p in data_points]
-    # Calculate axis ranges for domain coordinate conversion
-    min_x = min(x_values)
-    max_x = max(x_values)
-    x_min_log = np.log10(min_x * 0.5) if min_x > 0 else 0
-    x_max_log = np.log10(max_x * 1.5) if max_x > 0 else 3
-    min_score = min(y_values)
-    max_score = max(y_values)
-    y_min = min_score - 5 if min_score > 5 else 0
-    y_max = max_score + 5
-    # Calculate and draw Pareto Efficiency Frontier
-    # For size vs accuracy, we want: smaller size (lower x) AND higher accuracy (higher y)
-    # Sort by x ascending, then track maximum y seen
-    sorted_data = sorted(data_points, key=lambda p: (p['x'], -p['y']))
-    frontier_points = []
-    frontier_rows = []
-    max_score_so_far = float('-inf')
-    for point in sorted_data:
-        if point['y'] >= max_score_so_far:
-            frontier_points.append({'x': point['x'], 'y': point['y']})
-            frontier_rows.append(point)
-            max_score_so_far = point['y']
-    if frontier_points:
-        frontier_df = pd.DataFrame(frontier_points)
-        fig.add_trace(go.Scatter(
-            x=frontier_df['x'],
-            y=frontier_df['y'],
-            mode='lines',
-            name='Efficiency Frontier',
-            showlegend=False,
-            line=dict(color='#FFE165', width=2, dash='dash'),  # primary yellow
-            hoverinfo='skip'
-        ))
-    # Add invisible markers for hover functionality
-    fig.add_trace(go.Scatter(
-        x=x_values,
-        y=y_values,
-        mode='markers',
-        name='Models',
-        showlegend=False,
-        text=[p['hover_text'] for p in data_points],
-        hoverinfo='text',
-        marker=dict(
-            color='rgba(0,0,0,0)',  # Invisible markers
-            size=25,  # Large enough for hover detection
-            opacity=0
-        )
-    ))
-    # Add marker icon images for each data point (uniform size like Cost/Performance chart)
-    layout_images = []
-    for point in data_points:
-        x_val = point['x']
-        y_val = point['y']
-        model_name = point['model_name']
-        openness = point['openness']
-        marker_info = get_marker_icon(model_name, openness, mark_by)
-        logo_path = marker_info['path']
-        # Read the SVG file and encode as base64 data URI
-        if os.path.exists(logo_path):
-            try:
-                with open(logo_path, 'rb') as f:
-                    encoded_logo = base64.b64encode(f.read()).decode('utf-8')
-                    logo_uri = f"data:image/svg+xml;base64,{encoded_logo}"
-                    # Convert to domain coordinates (0-1 range) for log scale x-axis
-                    if x_val > 0:
-                        log_x = np.log10(x_val)
-                        domain_x = (log_x - x_min_log) / (x_max_log - x_min_log)
-                    else:
-                        domain_x = 0
-                    domain_y = (y_val - y_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0.5
-                    # Clamp to valid range
-                    domain_x = max(0, min(1, domain_x))
-                    domain_y = max(0, min(1, domain_y))
-                    # Uniform logo size (same as Cost/Performance chart)
-                    layout_images.append(dict(
-                        source=logo_uri,
-                        xref="x domain",
-                        yref="y domain",
-                        x=domain_x,
-                        y=domain_y,
-                        sizex=0.04,  # Size as fraction of plot width
-                        sizey=0.06,  # Size as fraction of plot height
-                        xanchor="center",
-                        yanchor="middle",
-                        layer="above"
-                    ))
-            except Exception:
-                pass
-    # Add model name labels for frontier points only (like Cost/Performance chart)
-    for point in frontier_rows:
-        x_val = point['x']
-        y_val = point['y']
-        model_name = point['model_name']
-        # Clean model name for label
-        if isinstance(model_name, list):
-            model_name = model_name[0] if model_name else ''
-        model_name = str(model_name).split('/')[-1]
-        if len(model_name) > 25:
-            model_name = model_name[:22] + '...'
-        # Transform x to log10 for annotation positioning on log scale
-        if x_val > 0:
-            x_log = np.log10(x_val)
-        else:
-            x_log = x_min_log
-        fig.add_annotation(
-            x=x_log,
-            y=y_val,
-            text=model_name,
-            showarrow=False,
-            yshift=25,
-            font=STANDARD_FONT,
-            xanchor='center',
-            yanchor='bottom'
-        )
-    # Build layout configuration
-    layout_config = dict(
-        **STANDARD_LAYOUT,
         title="Open Model Accuracy by Size",
-        xaxis=dict(
-            title="Parameters (Billions)",
-            type="log",
-            range=[x_min_log, x_max_log]
-        ),
-        yaxis=dict(
-            title="Average Score",
-            range=[y_min, y_max]
-        ),
     )
-    # Add company logo images to the layout
-    if layout_images:
-        layout_config['images'] = layout_images
-    fig.update_layout(**layout_config)
-    # Add OpenHands branding
-    add_branding_to_figure(fig)
-    return fig

 """
 Additional visualizations for the OpenHands Index leaderboard.
+These functions use the generic create_scatter_chart() from leaderboard_transformer
+as the single source of truth for scatter plot styling and behavior.
 """
 import pandas as pd
 import plotly.graph_objects as go
 import aliases
+# Import the generic scatter chart function - single source of truth
+from leaderboard_transformer import create_scatter_chart, STANDARD_LAYOUT, STANDARD_FONT
+def _find_column(df: pd.DataFrame, candidates: list, default: str = None) -> str:
+    """Find the first matching column name from candidates."""
+    for col in candidates:
+        if col in df.columns:
+            return col
+    return default
 def create_evolution_over_time_chart(df: pd.DataFrame, mark_by: str = None) -> go.Figure:
     """
     Create a chart showing model performance evolution over release dates.
     Args:
+        df: DataFrame with release_date and score columns
+        mark_by: One of "Company", "Openness", or "Country" for marker icons
     Returns:
         Plotly figure showing score evolution over time
     """
+    # Find the release date column
+    release_date_col = _find_column(df, ['release_date', 'Release_Date', 'Release Date'])
     if df.empty or release_date_col is None:
         fig = go.Figure()
             x=0.5, y=0.5, showarrow=False,
             font=STANDARD_FONT
         )
+        fig.update_layout(**STANDARD_LAYOUT, title="Model Performance Evolution Over Time")
         return fig
+    # Find score column
+    score_col = _find_column(df, ['Average Score', 'average score', 'Average score'])
     if score_col is None:
+        # Try to find any column with 'score' and 'average'
+        for col in df.columns:
             if 'score' in col.lower() and 'average' in col.lower():
                 score_col = col
                 break
             x=0.5, y=0.5, showarrow=False,
             font=STANDARD_FONT
         )
+        fig.update_layout(**STANDARD_LAYOUT, title="Model Performance Evolution Over Time")
         return fig
+    # Use the generic scatter chart
+    return create_scatter_chart(
+        df=df,
+        x_col=release_date_col,
+        y_col=score_col,
         title="Model Performance Evolution Over Time",
+        x_label="Model Release Date",
+        y_label="Average Score",
+        mark_by=mark_by,
+        x_type="date",
+        pareto_lower_is_better=False,  # Later dates with higher scores are better
     )
 def create_accuracy_by_size_chart(df: pd.DataFrame, mark_by: str = None) -> go.Figure:
     """
     Create a scatter plot showing accuracy vs parameter count for open-weights models.
     Args:
+        df: DataFrame with parameter_count and score columns
+        mark_by: One of "Company", "Openness", or "Country" for marker icons
     Returns:
+        Plotly figure showing accuracy vs model size
     """
+    # Find parameter count column
+    param_col = _find_column(df, ['parameter_count_b', 'Parameter_Count_B', 'Parameter Count B'])
     if df.empty or param_col is None:
         fig = go.Figure()
             x=0.5, y=0.5, showarrow=False,
             font=STANDARD_FONT
         )
+        fig.update_layout(**STANDARD_LAYOUT, title="Open Model Accuracy by Size")
         return fig
+    # Filter to only open-weights models
+    open_aliases = [aliases.CANONICAL_OPENNESS_OPEN] + list(
+        aliases.OPENNESS_ALIASES.get(aliases.CANONICAL_OPENNESS_OPEN, [])
+    )
     openness_col = 'Openness' if 'Openness' in df.columns else 'openness'
     plot_df = df[
             x=0.5, y=0.5, showarrow=False,
             font=STANDARD_FONT
         )
+        fig.update_layout(**STANDARD_LAYOUT, title="Open Model Accuracy by Size")
         return fig
+    # Find score column
+    score_col = _find_column(plot_df, ['Average Score', 'average score', 'Average score'])
     if score_col is None:
         for col in plot_df.columns:
             if 'score' in col.lower() and 'average' in col.lower():
             x=0.5, y=0.5, showarrow=False,
             font=STANDARD_FONT
         )
+        fig.update_layout(**STANDARD_LAYOUT, title="Open Model Accuracy by Size")
         return fig
+    # Use the generic scatter chart
+    return create_scatter_chart(
+        df=plot_df,
+        x_col=param_col,
+        y_col=score_col,
         title="Open Model Accuracy by Size",
+        x_label="Parameters (Billions)",
+        y_label="Average Score",
+        mark_by=mark_by,
+        x_type="log",
+        pareto_lower_is_better=True,  # Smaller models with higher scores are better
     )