Spaces:

OpenHands
/

openhands-index

Running

File size: 19,666 Bytes

"""
Additional visualizations for the OpenHands Index leaderboard.
"""
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime
import os
import base64
import aliases

# Import company logo mapping from ui_components
from ui_components import get_company_from_model, get_svg_as_data_uri

# Standard layout configuration matching existing charts
# Colors aligned with OpenHands brand
STANDARD_LAYOUT = dict(
    template="plotly_white",
    height=572,
    font=dict(
        family="Outfit, ui-sans-serif, sans-serif",
        color="#0D0D0F",  # neutral-950
    ),
    hoverlabel=dict(
        bgcolor="#222328",  # neutral-800
        font_size=12,
        font_family="Outfit",
        font_color="#F7F8FB",  # neutral-50
    ),
    legend=dict(
        bgcolor='#F7F8FB',  # neutral-50
    ),
    margin=dict(b=80),  # Extra margin for logo and URL
)

# Standard font for annotations
STANDARD_FONT = dict(
    size=10,
    color='#0D0D0F',  # neutral-950
    family='Outfit'
)

# OpenHands branding constants
OPENHANDS_LOGO_PATH = "assets/openhands_logo_color_forwhite.png"
OPENHANDS_URL = "https://index.openhands.dev"

# URL annotation for bottom right of charts
URL_ANNOTATION = dict(
    text=OPENHANDS_URL,
    xref="paper",
    yref="paper",
    x=1,
    y=-0.15,
    xanchor="right",
    yanchor="bottom",
    showarrow=False,
    font=dict(
        family="Outfit, ui-sans-serif, sans-serif",
        size=14,
        color="#82889B",  # neutral-400
    ),
)


def get_openhands_logo_image():
    """Get the OpenHands logo as a Plotly image dict for chart branding."""
    if os.path.exists(OPENHANDS_LOGO_PATH):
        try:
            with open(OPENHANDS_LOGO_PATH, "rb") as f:
                logo_data = base64.b64encode(f.read()).decode('utf-8')
            return dict(
                source=f"data:image/png;base64,{logo_data}",
                xref="paper",
                yref="paper",
                x=0,
                y=-0.15,
                sizex=0.15,
                sizey=0.15,
                xanchor="left",
                yanchor="bottom",
            )
        except Exception:
            pass
    return None


def add_branding_to_figure(fig: go.Figure) -> go.Figure:
    """Add OpenHands logo and URL to a Plotly figure."""
    # Add logo image
    logo_image = get_openhands_logo_image()
    if logo_image:
        existing_images = list(fig.layout.images) if fig.layout.images else []
        existing_images.append(logo_image)
        fig.update_layout(images=existing_images)
    
    # Add URL annotation
    existing_annotations = list(fig.layout.annotations) if fig.layout.annotations else []
    existing_annotations.append(URL_ANNOTATION)
    fig.update_layout(annotations=existing_annotations)
    
    return fig


def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
    """
    Create a chart showing model performance evolution over release dates.
    Uses company logos as markers to match the existing chart styling.
    
    Args:
        df: DataFrame with columns including 'release_date' or 'Release_Date', 'Language Model', 'average score', 'openness'
    
    Returns:
        Plotly figure showing score evolution over time
    """
    # Handle different column name formats
    release_date_col = None
    for col in ['release_date', 'Release_Date', 'Release Date']:
        if col in df.columns:
            release_date_col = col
            break
    
    if df.empty or release_date_col is None:
        fig = go.Figure()
        fig.add_annotation(
            text="No release date data available",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=STANDARD_FONT
        )
        fig.update_layout(**STANDARD_LAYOUT)
        return fig
    
    # Filter out rows without release dates
    plot_df = df[df[release_date_col].notna() & (df[release_date_col] != '')].copy()
    
    if plot_df.empty:
        fig = go.Figure()
        fig.add_annotation(
            text="No release date data available",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=STANDARD_FONT
        )
        fig.update_layout(**STANDARD_LAYOUT)
        return fig
    
    # Convert release_date to datetime (normalize column name)
    plot_df['release_date'] = pd.to_datetime(plot_df[release_date_col], errors='coerce')
    plot_df = plot_df.dropna(subset=['release_date'])
    
    # Sort by release date
    plot_df = plot_df.sort_values('release_date')
    
    # Get the score column (handle different naming conventions)
    score_col = None
    for col in ['average score', 'Average Score', 'Average score']:
        if col in plot_df.columns:
            score_col = col
            break
    if score_col is None:
        for col in plot_df.columns:
            if 'score' in col.lower() and 'average' in col.lower():
                score_col = col
                break
    
    if score_col is None:
        fig = go.Figure()
        fig.add_annotation(
            text="No score data available",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=STANDARD_FONT
        )
        fig.update_layout(**STANDARD_LAYOUT)
        return fig
    
    # Get model name column
    model_col = None
    for col in ['Language Model', 'Language model', 'llm_base']:
        if col in plot_df.columns:
            model_col = col
            break
    if model_col is None:
        model_col = 'Language Model'  # Default
    
    fig = go.Figure()
    
    # Add Pareto frontier line (monotonically increasing best score over time)
    if len(plot_df) > 1:
        # Compute Pareto frontier: only include points that set a new best score
        frontier_dates = []
        frontier_scores = []
        max_score_so_far = float('-inf')
        
        for _, row in plot_df.iterrows():
            current_score = row[score_col]
            current_date = row['release_date']
            
            if current_score > max_score_so_far:
                # This point is on the Pareto frontier
                frontier_dates.append(current_date)
                frontier_scores.append(current_score)
                max_score_so_far = current_score
        
        if frontier_dates:
            fig.add_trace(go.Scatter(
                x=frontier_dates,
                y=frontier_scores,
                mode='lines',
                line=dict(color='#FFE165', width=2, dash='dash'),  # primary yellow, dashed
                name='Pareto Frontier',
                hoverinfo='skip',
                showlegend=False
            ))
    
    # Calculate axis ranges
    min_date = plot_df['release_date'].min()
    max_date = plot_df['release_date'].max()
    min_score = plot_df[score_col].min()
    max_score = plot_df[score_col].max()
    y_min = min_score - 5 if min_score > 5 else 0
    y_max = max_score + 10  # Extra space for labels
    
    # Build hover text for each point
    hover_texts = []
    for _, row in plot_df.iterrows():
        model_name = row.get(model_col, 'Unknown')
        openness = row.get('Openness', row.get('openness', 'unknown'))
        h_pad = "   "
        hover_text = f"<br>{h_pad}<b>{model_name}</b>{h_pad}<br>"
        hover_text += f"{h_pad}Release: <b>{row['release_date'].strftime('%Y-%m-%d')}</b>{h_pad}<br>"
        hover_text += f"{h_pad}Average Score: <b>{row[score_col]:.1f}</b>{h_pad}<br>"
        hover_text += f"{h_pad}Openness: <b>{openness}</b>{h_pad}<br>"
        hover_texts.append(hover_text)
    
    plot_df['hover_text'] = hover_texts
    
    # Add invisible markers for hover functionality
    fig.add_trace(go.Scatter(
        x=plot_df['release_date'],
        y=plot_df[score_col],
        mode='markers',
        name='Models',
        showlegend=False,
        text=plot_df['hover_text'],
        hoverinfo='text',
        marker=dict(
            color='rgba(0,0,0,0)',  # Invisible markers
            size=25,  # Large enough for hover detection
            opacity=0
        )
    ))
    
    # Add company logo images for each data point using data coordinates
    layout_images = []
    labels_data = []
    
    for _, row in plot_df.iterrows():
        model_name = row.get(model_col, '')
        company_info = get_company_from_model(model_name)
        logo_path = company_info['path']
        
        # Read the SVG file and encode as base64 data URI
        if os.path.exists(logo_path):
            try:
                with open(logo_path, 'rb') as f:
                    encoded_logo = base64.b64encode(f.read()).decode('utf-8')
                    logo_uri = f"data:image/svg+xml;base64,{encoded_logo}"
                    
                    x_val = row['release_date']
                    y_val = row[score_col]
                    
                    # Use data coordinates for precise alignment
                    layout_images.append(dict(
                        source=logo_uri,
                        xref="x",
                        yref="y",
                        x=x_val,
                        y=y_val,
                        sizex=15 * 24 * 60 * 60 * 1000,  # ~15 days in milliseconds
                        sizey=3,  # score units
                        xanchor="center",
                        yanchor="middle",
                        layer="above"
                    ))
                    
                    # Store label data for annotation
                    labels_data.append({
                        'x': x_val,
                        'y': y_val,
                        'label': model_name
                    })
            except Exception:
                pass
    
    # Add model name labels above each point
    for item in labels_data:
        fig.add_annotation(
            x=item['x'],
            y=item['y'],
            xref="x",
            yref="y",
            text=item['label'],
            showarrow=False,
            yshift=20,
            font=STANDARD_FONT,
            xanchor='center',
            yanchor='bottom'
        )
    
    # Build layout configuration
    layout_config = dict(
        **STANDARD_LAYOUT,
        title="Model Performance Evolution Over Time",
        xaxis=dict(
            title="Model Release Date",
            range=[min_date - pd.Timedelta(days=15), max_date + pd.Timedelta(days=15)]
        ),
        yaxis=dict(
            title="Average Score",
            range=[y_min, y_max]
        ),
    )
    
    # Add company logo images to the layout
    if layout_images:
        layout_config['images'] = layout_images
    
    fig.update_layout(**layout_config)
    
    # Add OpenHands branding
    add_branding_to_figure(fig)
    
    return fig


def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
    """
    Create a scatter plot showing accuracy vs parameter count for open-weights models.
    Uses company logos as markers to match the existing chart styling.
    
    Args:
        df: DataFrame with columns including 'parameter_count_b' or 'Parameter_Count_B', 
            'active_parameter_count_b' or 'Active_Parameter_Count_B',
            'average score', 'openness', 'Language Model'
    
    Returns:
        Plotly figure showing accuracy vs model size
    """
    import numpy as np
    
    # Handle different column name formats for parameter count
    param_col = None
    for col in ['parameter_count_b', 'Parameter_Count_B', 'Parameter Count B']:
        if col in df.columns:
            param_col = col
            break
    
    active_param_col = None
    for col in ['active_parameter_count_b', 'Active_Parameter_Count_B', 'Active Parameter Count B']:
        if col in df.columns:
            active_param_col = col
            break
    
    if df.empty or param_col is None:
        fig = go.Figure()
        fig.add_annotation(
            text="No parameter count data available",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=STANDARD_FONT
        )
        fig.update_layout(**STANDARD_LAYOUT)
        return fig
    
    # Filter to only open-weights models with parameter data
    open_aliases = [aliases.CANONICAL_OPENNESS_OPEN] + list(aliases.OPENNESS_ALIASES.get(aliases.CANONICAL_OPENNESS_OPEN, []))
    
    # Get openness column
    openness_col = 'Openness' if 'Openness' in df.columns else 'openness'
    
    plot_df = df[
        (df[param_col].notna()) & 
        (df[openness_col].isin(open_aliases))
    ].copy()
    
    if plot_df.empty:
        fig = go.Figure()
        fig.add_annotation(
            text="No open-weights models with parameter data available",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=STANDARD_FONT
        )
        fig.update_layout(**STANDARD_LAYOUT)
        return fig
    
    # Get the score column (handle different naming conventions)
    score_col = None
    for col in ['average score', 'Average Score', 'Average score']:
        if col in plot_df.columns:
            score_col = col
            break
    if score_col is None:
        for col in plot_df.columns:
            if 'score' in col.lower() and 'average' in col.lower():
                score_col = col
                break
    
    if score_col is None:
        fig = go.Figure()
        fig.add_annotation(
            text="No score data available",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=STANDARD_FONT
        )
        fig.update_layout(**STANDARD_LAYOUT)
        return fig
    
    # Get model name column
    model_col = None
    for col in ['Language Model', 'Language model', 'llm_base']:
        if col in plot_df.columns:
            model_col = col
            break
    if model_col is None:
        model_col = 'Language Model'  # Default
    
    fig = go.Figure()
    
    # Prepare data for plotting
    x_values = []
    y_values = []
    hover_texts = []
    model_names = []
    total_params_list = []
    
    for _, row in plot_df.iterrows():
        total_params = row[param_col]
        active_params = row.get(active_param_col) if active_param_col else None
        model_name = row.get(model_col, 'Unknown')
        score = row[score_col]
        
        # Use active params for x-axis if available (more meaningful for MoE)
        x_val = active_params if pd.notna(active_params) else total_params
        
        x_values.append(x_val)
        y_values.append(score)
        model_names.append(model_name)
        total_params_list.append(total_params)
        
        # Create hover text matching existing chart style
        h_pad = "   "
        hover_text = f"<br>{h_pad}<b>{model_name}</b>{h_pad}<br>"
        hover_text += f"{h_pad}Total Params: <b>{total_params:.0f}B</b>{h_pad}<br>"
        if pd.notna(active_params):
            hover_text += f"{h_pad}Active Params: <b>{active_params:.0f}B</b>{h_pad}<br>"
        hover_text += f"{h_pad}Average Score: <b>{score:.1f}</b>{h_pad}<br>"
        hover_texts.append(hover_text)
    
    # Calculate axis ranges for domain coordinate conversion
    min_x = min(x_values)
    max_x = max(x_values)
    x_min_log = np.log10(min_x * 0.5) if min_x > 0 else 0
    x_max_log = np.log10(max_x * 1.5) if max_x > 0 else 3
    
    min_score = min(y_values)
    max_score = max(y_values)
    y_min = min_score - 5 if min_score > 5 else 0
    y_max = max_score + 10  # Extra space for labels
    
    # Add invisible markers for hover functionality
    fig.add_trace(go.Scatter(
        x=x_values,
        y=y_values,
        mode='markers',
        name='Models',
        showlegend=False,
        text=hover_texts,
        hoverinfo='text',
        marker=dict(
            color='rgba(0,0,0,0)',  # Invisible markers
            size=25,  # Large enough for hover detection
            opacity=0
        )
    ))
    
    # Add company logo images for each data point
    layout_images = []
    frontier_labels_data = []
    
    for i, (x_val, y_val, model_name, total_params) in enumerate(zip(x_values, y_values, model_names, total_params_list)):
        company_info = get_company_from_model(model_name)
        logo_path = company_info['path']
        
        # Read the SVG file and encode as base64 data URI
        if os.path.exists(logo_path):
            try:
                with open(logo_path, 'rb') as f:
                    encoded_logo = base64.b64encode(f.read()).decode('utf-8')
                    logo_uri = f"data:image/svg+xml;base64,{encoded_logo}"
                    
                    # Convert to domain coordinates (0-1 range) for log scale x-axis
                    if x_val > 0:
                        log_x = np.log10(x_val)
                        domain_x = (log_x - x_min_log) / (x_max_log - x_min_log)
                    else:
                        domain_x = 0
                    
                    domain_y = (y_val - y_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0.5
                    
                    # Clamp to valid range
                    domain_x = max(0.02, min(0.98, domain_x))
                    domain_y = max(0.02, min(0.98, domain_y))
                    
                    # Scale logo size based on total params
                    size_scale = 0.03 + (total_params / 2000)  # Larger models = larger logos
                    size_scale = min(size_scale, 0.06)  # Cap size
                    
                    layout_images.append(dict(
                        source=logo_uri,
                        xref="x domain",
                        yref="y domain",
                        x=domain_x,
                        y=domain_y,
                        sizex=size_scale,
                        sizey=size_scale * 1.5,
                        xanchor="center",
                        yanchor="middle",
                        layer="above"
                    ))
                    
                    # Store label data for annotation
                    frontier_labels_data.append({
                        'x': domain_x,
                        'y': domain_y,
                        'label': model_name
                    })
            except Exception:
                pass
    
    # Add model name labels above each point
    for item in frontier_labels_data:
        fig.add_annotation(
            x=item['x'],
            y=item['y'],
            xref="x domain",
            yref="y domain",
            text=item['label'],
            showarrow=False,
            yshift=25,
            font=STANDARD_FONT,
            xanchor='center',
            yanchor='bottom'
        )
    
    # Build layout configuration
    layout_config = dict(
        **STANDARD_LAYOUT,
        title="Open Model Accuracy by Size",
        xaxis=dict(
            title="Active Parameters (Billions)",
            type="log",
            range=[x_min_log, x_max_log]
        ),
        yaxis=dict(
            title="Average Score",
            range=[y_min, y_max]
        ),
    )
    
    # Add company logo images to the layout
    if layout_images:
        layout_config['images'] = layout_images
    
    fig.update_layout(**layout_config)
    
    # Add annotation explaining marker size
    fig.add_annotation(
        text="Logo size indicates total parameter count",
        xref="paper", yref="paper",
        x=0.02, y=-0.08,
        showarrow=False,
        font=STANDARD_FONT,
        align='left'
    )
    
    # Add OpenHands branding
    add_branding_to_figure(fig)
    
    return fig