Spaces:

OpenHands
/

openhands-index

Running

openhands openhands commited on Jan 26

Commit

76b9525

1 Parent(s): c14a283

Update visualization styling to match existing charts

- Use company logos as markers instead of colored dots
- Apply standard layout configuration (template, height, hoverlabel)
- Use domain coordinates for logo positioning
- Add model name labels above each point
- Match hover text formatting with existing charts

Co-authored-by: openhands <openhands@all-hands.dev>

Files changed (1) hide show

visualizations.py +293 -131

visualizations.py CHANGED Viewed

@@ -5,12 +5,40 @@ import pandas as pd
 import plotly.graph_objects as go
 import plotly.express as px
 from datetime import datetime
 import aliases
 def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
     """
-    Create a line chart showing model performance evolution over release dates.
     Args:
         df: DataFrame with columns including 'release_date' or 'Release_Date', 'Language Model', 'average score', 'openness'
@@ -31,8 +59,9 @@ def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
             text="No release date data available",
             xref="paper", yref="paper",
             x=0.5, y=0.5, showarrow=False,
-            font=dict(size=16)
         )
         return fig
     # Filter out rows without release dates
@@ -44,8 +73,9 @@ def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
             text="No release date data available",
             xref="paper", yref="paper",
             x=0.5, y=0.5, showarrow=False,
-            font=dict(size=16)
         )
         return fig
     # Convert release_date to datetime (normalize column name)
@@ -73,8 +103,9 @@ def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
             text="No score data available",
             xref="paper", yref="paper",
             x=0.5, y=0.5, showarrow=False,
-            font=dict(size=16)
         )
         return fig
     # Get model name column
@@ -86,46 +117,9 @@ def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
     if model_col is None:
         model_col = 'Language Model'  # Default
-    # Map openness to colors
-    color_map = {
-        aliases.CANONICAL_OPENNESS_OPEN: "#F0529C",  # Pink for open
-        aliases.CANONICAL_OPENNESS_CLOSED: "#FFD700",  # Yellow/gold for closed
-    }
-    for canonical_openness, openness_aliases in aliases.OPENNESS_ALIASES.items():
-        for openness_alias in openness_aliases:
-            color_map[openness_alias] = color_map[canonical_openness]
     fig = go.Figure()
-    # Add scatter points for each model
-    for _, row in plot_df.iterrows():
-        openness = row.get('Openness', row.get('openness', 'unknown'))
-        color = color_map.get(openness, '#888888')
-        model_name = row.get(model_col, 'Unknown')
-        fig.add_trace(go.Scatter(
-            x=[row['release_date']],
-            y=[row[score_col]],
-            mode='markers+text',
-            marker=dict(
-                size=12,
-                color=color,
-                line=dict(width=1, color='#333333')
-            ),
-            text=[model_name],
-            textposition='top center',
-            textfont=dict(size=10),
-            name=model_name,
-            hovertemplate=(
-                f"<b>{model_name}</b><br>"
-                f"Release: %{{x|%Y-%m-%d}}<br>"
-                f"Score: %{{y:.1f}}<br>"
-                f"<extra></extra>"
-            ),
-            showlegend=False
-        ))
-    # Add trend line
     if len(plot_df) > 1:
         fig.add_trace(go.Scatter(
             x=plot_df['release_date'],
@@ -137,54 +131,133 @@ def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
             showlegend=False
         ))
-    # Update layout
-    fig.update_layout(
-        title=dict(
-            text="Model Performance Evolution Over Time",
-            font=dict(size=18)
-        ),
         xaxis=dict(
             title="Model Release Date",
-            showgrid=True,
-            gridcolor='rgba(128,128,128,0.2)'
         ),
         yaxis=dict(
             title="Average Score",
-            showgrid=True,
-            gridcolor='rgba(128,128,128,0.2)'
         ),
-        plot_bgcolor='rgba(0,0,0,0)',
-        paper_bgcolor='rgba(0,0,0,0)',
-        hovermode='closest',
-        margin=dict(l=60, r=40, t=60, b=60),
-        height=400
     )
-    # Add legend for openness
-    fig.add_trace(go.Scatter(
-        x=[None], y=[None],
-        mode='markers',
-        marker=dict(size=10, color='#F0529C'),
-        name='Open Weights',
-        showlegend=True
-    ))
-    fig.add_trace(go.Scatter(
-        x=[None], y=[None],
-        mode='markers',
-        marker=dict(size=10, color='#FFD700'),
-        name='Closed',
-        showlegend=True
-    ))
-    fig.update_layout(
-        legend=dict(
-            orientation="h",
-            yanchor="bottom",
-            y=1.02,
-            xanchor="right",
-            x=1
-        )
-    )
     return fig
@@ -192,6 +265,7 @@ def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
 def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
     """
     Create a scatter plot showing accuracy vs parameter count for open-weights models.
     Args:
         df: DataFrame with columns including 'parameter_count_b' or 'Parameter_Count_B',
@@ -201,6 +275,8 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
     Returns:
         Plotly figure showing accuracy vs model size
     """
     # Handle different column name formats for parameter count
     param_col = None
     for col in ['parameter_count_b', 'Parameter_Count_B', 'Parameter Count B']:
@@ -220,8 +296,9 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
             text="No parameter count data available",
             xref="paper", yref="paper",
             x=0.5, y=0.5, showarrow=False,
-            font=dict(size=16)
         )
         return fig
     # Filter to only open-weights models with parameter data
@@ -241,8 +318,9 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
             text="No open-weights models with parameter data available",
             xref="paper", yref="paper",
             x=0.5, y=0.5, showarrow=False,
-            font=dict(size=16)
         )
         return fig
     # Get the score column (handle different naming conventions)
@@ -263,8 +341,9 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
             text="No score data available",
             xref="paper", yref="paper",
             x=0.5, y=0.5, showarrow=False,
-            font=dict(size=16)
         )
         return fig
     # Get model name column
@@ -278,8 +357,13 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
     fig = go.Figure()
-    # Determine if we should use active params (for MoE models) or total params
-    # Use active params if available, otherwise total params
     for _, row in plot_df.iterrows():
         total_params = row[param_col]
         active_params = row.get(active_param_col) if active_param_col else None
@@ -289,66 +373,144 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
         # Use active params for x-axis if available (more meaningful for MoE)
         x_val = active_params if pd.notna(active_params) else total_params
-        # Create hover text
-        hover_text = f"<b>{model_name}</b><br>"
-        hover_text += f"Total Params: {total_params:.0f}B<br>"
-        if pd.notna(active_params):
-            hover_text += f"Active Params: {active_params:.0f}B<br>"
-        hover_text += f"Score: {score:.1f}<br>"
-        # Marker size based on total params (larger models = larger markers)
-        marker_size = 10 + (total_params / 100)  # Scale marker size
-        marker_size = min(marker_size, 30)  # Cap at 30
-        fig.add_trace(go.Scatter(
-            x=[x_val],
-            y=[score],
-            mode='markers+text',
-            marker=dict(
-                size=marker_size,
-                color='#F0529C',  # Pink for open models
-                line=dict(width=1, color='#333333'),
-                opacity=0.8
-            ),
-            text=[model_name],
-            textposition='top center',
-            textfont=dict(size=10),
-            name=model_name,
-            hovertemplate=hover_text + "<extra></extra>",
-            showlegend=False
-        ))
-    # Update layout
-    fig.update_layout(
-        title=dict(
-            text="Open Model Accuracy by Size",
-            font=dict(size=18)
-        ),
         xaxis=dict(
             title="Active Parameters (Billions)",
-            showgrid=True,
-            gridcolor='rgba(128,128,128,0.2)',
-            type='log'  # Log scale for better visualization
         ),
         yaxis=dict(
             title="Average Score",
-            showgrid=True,
-            gridcolor='rgba(128,128,128,0.2)'
         ),
-        plot_bgcolor='rgba(0,0,0,0)',
-        paper_bgcolor='rgba(0,0,0,0)',
-        hovermode='closest',
-        margin=dict(l=60, r=40, t=60, b=60),
-        height=400
     )
     # Add annotation explaining marker size
     fig.add_annotation(
-        text="Marker size indicates total parameter count",
         xref="paper", yref="paper",
-        x=0.02, y=-0.12,
         showarrow=False,
-        font=dict(size=10, color='gray'),
         align='left'
     )

 import plotly.graph_objects as go
 import plotly.express as px
 from datetime import datetime
+import os
+import base64
 import aliases
+# Import company logo mapping from ui_components
+from ui_components import get_company_from_model, get_svg_as_data_uri
+# Standard layout configuration matching existing charts
+STANDARD_LAYOUT = dict(
+    template="plotly_white",
+    height=572,
+    hoverlabel=dict(
+        bgcolor="#105257",
+        font_size=12,
+        font_family="Manrope",
+        font_color="#d3dedc",
+    ),
+    legend=dict(
+        bgcolor='#FAF2E9',
+    ),
+)
+# Standard font for annotations
+STANDARD_FONT = dict(
+    size=10,
+    color='#032629',
+    family='Manrope'
+)
 def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
     """
+    Create a chart showing model performance evolution over release dates.
+    Uses company logos as markers to match the existing chart styling.
     Args:
         df: DataFrame with columns including 'release_date' or 'Release_Date', 'Language Model', 'average score', 'openness'
             text="No release date data available",
             xref="paper", yref="paper",
             x=0.5, y=0.5, showarrow=False,
+            font=STANDARD_FONT
         )
+        fig.update_layout(**STANDARD_LAYOUT)
         return fig
     # Filter out rows without release dates
             text="No release date data available",
             xref="paper", yref="paper",
             x=0.5, y=0.5, showarrow=False,
+            font=STANDARD_FONT
         )
+        fig.update_layout(**STANDARD_LAYOUT)
         return fig
     # Convert release_date to datetime (normalize column name)
             text="No score data available",
             xref="paper", yref="paper",
             x=0.5, y=0.5, showarrow=False,
+            font=STANDARD_FONT
         )
+        fig.update_layout(**STANDARD_LAYOUT)
         return fig
     # Get model name column
     if model_col is None:
         model_col = 'Language Model'  # Default
     fig = go.Figure()
+    # Add trend line first (so it's behind the markers)
     if len(plot_df) > 1:
         fig.add_trace(go.Scatter(
             x=plot_df['release_date'],
             showlegend=False
         ))
+    # Calculate axis ranges for domain coordinate conversion
+    min_date = plot_df['release_date'].min()
+    max_date = plot_df['release_date'].max()
+    date_range = (max_date - min_date).total_seconds() if max_date != min_date else 1
+    min_score = plot_df[score_col].min()
+    max_score = plot_df[score_col].max()
+    y_min = min_score - 5 if min_score > 5 else 0
+    y_max = max_score + 10  # Extra space for labels
+    # Build hover text for each point
+    hover_texts = []
+    for _, row in plot_df.iterrows():
+        model_name = row.get(model_col, 'Unknown')
+        openness = row.get('Openness', row.get('openness', 'unknown'))
+        h_pad = "   "
+        hover_text = f"<br>{h_pad}<b>{model_name}</b>{h_pad}<br>"
+        hover_text += f"{h_pad}Release: <b>{row['release_date'].strftime('%Y-%m-%d')}</b>{h_pad}<br>"
+        hover_text += f"{h_pad}Average Score: <b>{row[score_col]:.1f}</b>{h_pad}<br>"
+        hover_text += f"{h_pad}Openness: <b>{openness}</b>{h_pad}<br>"
+        hover_texts.append(hover_text)
+    plot_df['hover_text'] = hover_texts
+    # Add invisible markers for hover functionality
+    fig.add_trace(go.Scatter(
+        x=plot_df['release_date'],
+        y=plot_df[score_col],
+        mode='markers',
+        name='Models',
+        showlegend=False,
+        text=plot_df['hover_text'],
+        hoverinfo='text',
+        marker=dict(
+            color='rgba(0,0,0,0)',  # Invisible markers
+            size=25,  # Large enough for hover detection
+            opacity=0
+        )
+    ))
+    # Add company logo images for each data point
+    layout_images = []
+    frontier_labels_data = []
+    for _, row in plot_df.iterrows():
+        model_name = row.get(model_col, '')
+        company_info = get_company_from_model(model_name)
+        logo_path = company_info['path']
+        # Read the SVG file and encode as base64 data URI
+        if os.path.exists(logo_path):
+            try:
+                with open(logo_path, 'rb') as f:
+                    encoded_logo = base64.b64encode(f.read()).decode('utf-8')
+                    logo_uri = f"data:image/svg+xml;base64,{encoded_logo}"
+                    x_val = row['release_date']
+                    y_val = row[score_col]
+                    # Convert to domain coordinates (0-1 range)
+                    if date_range > 0:
+                        domain_x = (x_val - min_date).total_seconds() / date_range
+                    else:
+                        domain_x = 0.5
+                    domain_y = (y_val - y_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0.5
+                    # Clamp to valid range
+                    domain_x = max(0.02, min(0.98, domain_x))
+                    domain_y = max(0.02, min(0.98, domain_y))
+                    layout_images.append(dict(
+                        source=logo_uri,
+                        xref="x domain",
+                        yref="y domain",
+                        x=domain_x,
+                        y=domain_y,
+                        sizex=0.04,
+                        sizey=0.06,
+                        xanchor="center",
+                        yanchor="middle",
+                        layer="above"
+                    ))
+                    # Store label data for annotation
+                    frontier_labels_data.append({
+                        'x': domain_x,
+                        'y': domain_y,
+                        'label': model_name
+                    })
+            except Exception:
+                pass
+    # Add model name labels above each point
+    for item in frontier_labels_data:
+        fig.add_annotation(
+            x=item['x'],
+            y=item['y'],
+            xref="x domain",
+            yref="y domain",
+            text=item['label'],
+            showarrow=False,
+            yshift=20,
+            font=STANDARD_FONT,
+            xanchor='center',
+            yanchor='bottom'
+        )
+    # Build layout configuration
+    layout_config = dict(
+        **STANDARD_LAYOUT,
+        title="Model Performance Evolution Over Time",
         xaxis=dict(
             title="Model Release Date",
+            range=[min_date - pd.Timedelta(days=15), max_date + pd.Timedelta(days=15)]
         ),
         yaxis=dict(
             title="Average Score",
+            range=[y_min, y_max]
         ),
     )
+    # Add company logo images to the layout
+    if layout_images:
+        layout_config['images'] = layout_images
+    fig.update_layout(**layout_config)
     return fig
 def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
     """
     Create a scatter plot showing accuracy vs parameter count for open-weights models.
+    Uses company logos as markers to match the existing chart styling.
     Args:
         df: DataFrame with columns including 'parameter_count_b' or 'Parameter_Count_B',
     Returns:
         Plotly figure showing accuracy vs model size
     """
+    import numpy as np
     # Handle different column name formats for parameter count
     param_col = None
     for col in ['parameter_count_b', 'Parameter_Count_B', 'Parameter Count B']:
             text="No parameter count data available",
             xref="paper", yref="paper",
             x=0.5, y=0.5, showarrow=False,
+            font=STANDARD_FONT
         )
+        fig.update_layout(**STANDARD_LAYOUT)
         return fig
     # Filter to only open-weights models with parameter data
             text="No open-weights models with parameter data available",
             xref="paper", yref="paper",
             x=0.5, y=0.5, showarrow=False,
+            font=STANDARD_FONT
         )
+        fig.update_layout(**STANDARD_LAYOUT)
         return fig
     # Get the score column (handle different naming conventions)
             text="No score data available",
             xref="paper", yref="paper",
             x=0.5, y=0.5, showarrow=False,
+            font=STANDARD_FONT
         )
+        fig.update_layout(**STANDARD_LAYOUT)
         return fig
     # Get model name column
     fig = go.Figure()
+    # Prepare data for plotting
+    x_values = []
+    y_values = []
+    hover_texts = []
+    model_names = []
+    total_params_list = []
     for _, row in plot_df.iterrows():
         total_params = row[param_col]
         active_params = row.get(active_param_col) if active_param_col else None
         # Use active params for x-axis if available (more meaningful for MoE)
         x_val = active_params if pd.notna(active_params) else total_params
+        x_values.append(x_val)
+        y_values.append(score)
+        model_names.append(model_name)
+        total_params_list.append(total_params)
+        # Create hover text matching existing chart style
+        h_pad = "   "
+        hover_text = f"<br>{h_pad}<b>{model_name}</b>{h_pad}<br>"
+        hover_text += f"{h_pad}Total Params: <b>{total_params:.0f}B</b>{h_pad}<br>"
+        if pd.notna(active_params):
+            hover_text += f"{h_pad}Active Params: <b>{active_params:.0f}B</b>{h_pad}<br>"
+        hover_text += f"{h_pad}Average Score: <b>{score:.1f}</b>{h_pad}<br>"
+        hover_texts.append(hover_text)
+    # Calculate axis ranges for domain coordinate conversion
+    min_x = min(x_values)
+    max_x = max(x_values)
+    x_min_log = np.log10(min_x * 0.5) if min_x > 0 else 0
+    x_max_log = np.log10(max_x * 1.5) if max_x > 0 else 3
+    min_score = min(y_values)
+    max_score = max(y_values)
+    y_min = min_score - 5 if min_score > 5 else 0
+    y_max = max_score + 10  # Extra space for labels
+    # Add invisible markers for hover functionality
+    fig.add_trace(go.Scatter(
+        x=x_values,
+        y=y_values,
+        mode='markers',
+        name='Models',
+        showlegend=False,
+        text=hover_texts,
+        hoverinfo='text',
+        marker=dict(
+            color='rgba(0,0,0,0)',  # Invisible markers
+            size=25,  # Large enough for hover detection
+            opacity=0
+        )
+    ))
+    # Add company logo images for each data point
+    layout_images = []
+    frontier_labels_data = []
+    for i, (x_val, y_val, model_name, total_params) in enumerate(zip(x_values, y_values, model_names, total_params_list)):
+        company_info = get_company_from_model(model_name)
+        logo_path = company_info['path']
+        # Read the SVG file and encode as base64 data URI
+        if os.path.exists(logo_path):
+            try:
+                with open(logo_path, 'rb') as f:
+                    encoded_logo = base64.b64encode(f.read()).decode('utf-8')
+                    logo_uri = f"data:image/svg+xml;base64,{encoded_logo}"
+                    # Convert to domain coordinates (0-1 range) for log scale x-axis
+                    if x_val > 0:
+                        log_x = np.log10(x_val)
+                        domain_x = (log_x - x_min_log) / (x_max_log - x_min_log)
+                    else:
+                        domain_x = 0
+                    domain_y = (y_val - y_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0.5
+                    # Clamp to valid range
+                    domain_x = max(0.02, min(0.98, domain_x))
+                    domain_y = max(0.02, min(0.98, domain_y))
+                    # Scale logo size based on total params
+                    size_scale = 0.03 + (total_params / 2000)  # Larger models = larger logos
+                    size_scale = min(size_scale, 0.06)  # Cap size
+                    layout_images.append(dict(
+                        source=logo_uri,
+                        xref="x domain",
+                        yref="y domain",
+                        x=domain_x,
+                        y=domain_y,
+                        sizex=size_scale,
+                        sizey=size_scale * 1.5,
+                        xanchor="center",
+                        yanchor="middle",
+                        layer="above"
+                    ))
+                    # Store label data for annotation
+                    frontier_labels_data.append({
+                        'x': domain_x,
+                        'y': domain_y,
+                        'label': model_name
+                    })
+            except Exception:
+                pass
+    # Add model name labels above each point
+    for item in frontier_labels_data:
+        fig.add_annotation(
+            x=item['x'],
+            y=item['y'],
+            xref="x domain",
+            yref="y domain",
+            text=item['label'],
+            showarrow=False,
+            yshift=25,
+            font=STANDARD_FONT,
+            xanchor='center',
+            yanchor='bottom'
+        )
+    # Build layout configuration
+    layout_config = dict(
+        **STANDARD_LAYOUT,
+        title="Open Model Accuracy by Size",
         xaxis=dict(
             title="Active Parameters (Billions)",
+            type="log",
+            range=[x_min_log, x_max_log]
         ),
         yaxis=dict(
             title="Average Score",
+            range=[y_min, y_max]
         ),
     )
+    # Add company logo images to the layout
+    if layout_images:
+        layout_config['images'] = layout_images
+    fig.update_layout(**layout_config)
     # Add annotation explaining marker size
     fig.add_annotation(
+        text="Logo size indicates total parameter count",
         xref="paper", yref="paper",
+        x=0.02, y=-0.08,
         showarrow=False,
+        font=STANDARD_FONT,
         align='left'
     )