File size: 5,488 Bytes
a4b9436
 
1f75b60
 
 
a4b9436
 
 
 
 
1f75b60
 
76b9525
 
1f75b60
 
 
 
 
 
76b9525
a4b9436
d731ad4
a4b9436
76b9525
a4b9436
 
1f75b60
 
a4b9436
 
 
 
1f75b60
 
71ba49b
 
a4b9436
 
 
 
 
76b9525
a4b9436
1f75b60
a4b9436
 
1f75b60
 
a4b9436
1f75b60
 
a4b9436
 
 
 
 
 
 
 
 
 
76b9525
a4b9436
1f75b60
a4b9436
 
1f75b60
 
 
 
 
76b9525
1f75b60
 
 
 
 
a4b9436
 
 
d731ad4
a4b9436
 
 
 
1f75b60
 
a4b9436
 
1f75b60
a4b9436
1f75b60
 
71ba49b
 
a4b9436
 
 
 
 
76b9525
a4b9436
1f75b60
a4b9436
 
1f75b60
 
 
 
71ba49b
 
a4b9436
71ba49b
 
a4b9436
 
 
 
 
 
 
 
76b9525
a4b9436
1f75b60
a4b9436
 
1f75b60
 
a4b9436
 
 
 
 
 
 
 
 
 
 
 
76b9525
a4b9436
1f75b60
a4b9436
 
1f75b60
 
 
 
 
76b9525
1f75b60
 
 
 
 
a4b9436
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
"""
Additional visualizations for the OpenHands Index leaderboard.

These functions use the generic create_scatter_chart() from leaderboard_transformer
as the single source of truth for scatter plot styling and behavior.
"""
import pandas as pd
import plotly.graph_objects as go
import aliases

# Import the generic scatter chart function - single source of truth
from leaderboard_transformer import create_scatter_chart, STANDARD_LAYOUT, STANDARD_FONT


def _find_column(df: pd.DataFrame, candidates: list, default: str = None) -> str:
    """Find the first matching column name from candidates."""
    for col in candidates:
        if col in df.columns:
            return col
    return default


def create_evolution_over_time_chart(df: pd.DataFrame, mark_by: str = None) -> go.Figure:
    """
    Create a chart showing model performance evolution over release dates.
    
    Args:
        df: DataFrame with release_date and score columns
        mark_by: One of "Company", "Openness", or "Country" for marker icons
    
    Returns:
        Plotly figure showing score evolution over time
    """
    # Find the release date column
    release_date_col = _find_column(df, ['release_date', 'Release_Date', 'Release Date'])
    
    if df.empty or release_date_col is None:
        fig = go.Figure()
        fig.add_annotation(
            text="No release date data available",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=STANDARD_FONT
        )
        fig.update_layout(**STANDARD_LAYOUT, title="Model Performance Evolution Over Time")
        return fig
    
    # Find score column
    score_col = _find_column(df, ['Average Score', 'average score', 'Average score'])
    if score_col is None:
        # Try to find any column with 'score' and 'average'
        for col in df.columns:
            if 'score' in col.lower() and 'average' in col.lower():
                score_col = col
                break
    
    if score_col is None:
        fig = go.Figure()
        fig.add_annotation(
            text="No score data available",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=STANDARD_FONT
        )
        fig.update_layout(**STANDARD_LAYOUT, title="Model Performance Evolution Over Time")
        return fig
    
    # Use the generic scatter chart
    return create_scatter_chart(
        df=df,
        x_col=release_date_col,
        y_col=score_col,
        title="Model Performance Evolution Over Time",
        x_label="Model Release Date",
        y_label="Average Score",
        mark_by=mark_by,
        x_type="date",
        pareto_lower_is_better=False,  # Later dates with higher scores are better
    )


def create_accuracy_by_size_chart(df: pd.DataFrame, mark_by: str = None) -> go.Figure:
    """
    Create a scatter plot showing accuracy vs parameter count for open-weights models.
    
    Args:
        df: DataFrame with parameter_count and score columns
        mark_by: One of "Company", "Openness", or "Country" for marker icons
    
    Returns:
        Plotly figure showing accuracy vs model size
    """
    # Find parameter count column
    param_col = _find_column(df, ['parameter_count_b', 'Parameter_Count_B', 'Parameter Count B'])
    
    if df.empty or param_col is None:
        fig = go.Figure()
        fig.add_annotation(
            text="No parameter count data available",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=STANDARD_FONT
        )
        fig.update_layout(**STANDARD_LAYOUT, title="Open Model Accuracy by Size")
        return fig
    
    # Filter to only open-weights models
    open_aliases = [aliases.CANONICAL_OPENNESS_OPEN] + list(
        aliases.OPENNESS_ALIASES.get(aliases.CANONICAL_OPENNESS_OPEN, [])
    )
    openness_col = 'Openness' if 'Openness' in df.columns else 'openness'
    
    plot_df = df[
        (df[param_col].notna()) & 
        (df[openness_col].isin(open_aliases))
    ].copy()
    
    if plot_df.empty:
        fig = go.Figure()
        fig.add_annotation(
            text="No open-weights models with parameter data available",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=STANDARD_FONT
        )
        fig.update_layout(**STANDARD_LAYOUT, title="Open Model Accuracy by Size")
        return fig
    
    # Find score column
    score_col = _find_column(plot_df, ['Average Score', 'average score', 'Average score'])
    if score_col is None:
        for col in plot_df.columns:
            if 'score' in col.lower() and 'average' in col.lower():
                score_col = col
                break
    
    if score_col is None:
        fig = go.Figure()
        fig.add_annotation(
            text="No score data available",
            xref="paper", yref="paper",
            x=0.5, y=0.5, showarrow=False,
            font=STANDARD_FONT
        )
        fig.update_layout(**STANDARD_LAYOUT, title="Open Model Accuracy by Size")
        return fig
    
    # Use the generic scatter chart
    return create_scatter_chart(
        df=plot_df,
        x_col=param_col,
        y_col=score_col,
        title="Open Model Accuracy by Size",
        x_label="Parameters (Billions)",
        y_label="Average Score",
        mark_by=mark_by,
        x_type="log",
        pareto_lower_is_better=True,  # Smaller models with higher scores are better
    )