Spaces:
Running
Running
| """ | |
| Additional visualizations for the OpenHands Index leaderboard. | |
| """ | |
| import pandas as pd | |
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| from datetime import datetime | |
| import os | |
| import base64 | |
| import aliases | |
| # Import company logo mapping from ui_components | |
| from ui_components import get_company_from_model, get_svg_as_data_uri | |
| # Standard layout configuration matching existing charts | |
| # Colors aligned with OpenHands brand | |
| STANDARD_LAYOUT = dict( | |
| template="plotly_white", | |
| height=572, | |
| font=dict( | |
| family="Outfit, ui-sans-serif, sans-serif", | |
| color="#0D0D0F", # neutral-950 | |
| ), | |
| hoverlabel=dict( | |
| bgcolor="#222328", # neutral-800 | |
| font_size=12, | |
| font_family="Outfit", | |
| font_color="#F7F8FB", # neutral-50 | |
| ), | |
| legend=dict( | |
| bgcolor='#F7F8FB', # neutral-50 | |
| ), | |
| margin=dict(b=80), # Extra margin for logo and URL | |
| ) | |
| # Standard font for annotations | |
| STANDARD_FONT = dict( | |
| size=10, | |
| color='#0D0D0F', # neutral-950 | |
| family='Outfit' | |
| ) | |
| # OpenHands branding constants | |
| OPENHANDS_LOGO_PATH = "assets/openhands_logo_color_forwhite.png" | |
| OPENHANDS_URL = "https://index.openhands.dev" | |
| # URL annotation for bottom right of charts | |
| URL_ANNOTATION = dict( | |
| text=OPENHANDS_URL, | |
| xref="paper", | |
| yref="paper", | |
| x=1, | |
| y=-0.15, | |
| xanchor="right", | |
| yanchor="bottom", | |
| showarrow=False, | |
| font=dict( | |
| family="Outfit, ui-sans-serif, sans-serif", | |
| size=14, | |
| color="#82889B", # neutral-400 | |
| ), | |
| ) | |
| def get_openhands_logo_image(): | |
| """Get the OpenHands logo as a Plotly image dict for chart branding.""" | |
| if os.path.exists(OPENHANDS_LOGO_PATH): | |
| try: | |
| with open(OPENHANDS_LOGO_PATH, "rb") as f: | |
| logo_data = base64.b64encode(f.read()).decode('utf-8') | |
| return dict( | |
| source=f"data:image/png;base64,{logo_data}", | |
| xref="paper", | |
| yref="paper", | |
| x=0, | |
| y=-0.15, | |
| sizex=0.15, | |
| sizey=0.15, | |
| xanchor="left", | |
| yanchor="bottom", | |
| ) | |
| except Exception: | |
| pass | |
| return None | |
| def add_branding_to_figure(fig: go.Figure) -> go.Figure: | |
| """Add OpenHands logo and URL to a Plotly figure.""" | |
| # Add logo image | |
| logo_image = get_openhands_logo_image() | |
| if logo_image: | |
| existing_images = list(fig.layout.images) if fig.layout.images else [] | |
| existing_images.append(logo_image) | |
| fig.update_layout(images=existing_images) | |
| # Add URL annotation | |
| existing_annotations = list(fig.layout.annotations) if fig.layout.annotations else [] | |
| existing_annotations.append(URL_ANNOTATION) | |
| fig.update_layout(annotations=existing_annotations) | |
| return fig | |
| def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure: | |
| """ | |
| Create a chart showing model performance evolution over release dates. | |
| Uses company logos as markers to match the existing chart styling. | |
| Args: | |
| df: DataFrame with columns including 'release_date' or 'Release_Date', 'Language Model', 'average score', 'openness' | |
| Returns: | |
| Plotly figure showing score evolution over time | |
| """ | |
| # Handle different column name formats | |
| release_date_col = None | |
| for col in ['release_date', 'Release_Date', 'Release Date']: | |
| if col in df.columns: | |
| release_date_col = col | |
| break | |
| if df.empty or release_date_col is None: | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="No release date data available", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.5, showarrow=False, | |
| font=STANDARD_FONT | |
| ) | |
| fig.update_layout(**STANDARD_LAYOUT) | |
| return fig | |
| # Filter out rows without release dates | |
| plot_df = df[df[release_date_col].notna() & (df[release_date_col] != '')].copy() | |
| if plot_df.empty: | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="No release date data available", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.5, showarrow=False, | |
| font=STANDARD_FONT | |
| ) | |
| fig.update_layout(**STANDARD_LAYOUT) | |
| return fig | |
| # Convert release_date to datetime (normalize column name) | |
| plot_df['release_date'] = pd.to_datetime(plot_df[release_date_col], errors='coerce') | |
| plot_df = plot_df.dropna(subset=['release_date']) | |
| # Sort by release date | |
| plot_df = plot_df.sort_values('release_date') | |
| # Get the score column (handle different naming conventions) | |
| score_col = None | |
| for col in ['average score', 'Average Score', 'Average score']: | |
| if col in plot_df.columns: | |
| score_col = col | |
| break | |
| if score_col is None: | |
| for col in plot_df.columns: | |
| if 'score' in col.lower() and 'average' in col.lower(): | |
| score_col = col | |
| break | |
| if score_col is None: | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="No score data available", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.5, showarrow=False, | |
| font=STANDARD_FONT | |
| ) | |
| fig.update_layout(**STANDARD_LAYOUT) | |
| return fig | |
| # Get model name column | |
| model_col = None | |
| for col in ['Language Model', 'Language model', 'llm_base']: | |
| if col in plot_df.columns: | |
| model_col = col | |
| break | |
| if model_col is None: | |
| model_col = 'Language Model' # Default | |
| fig = go.Figure() | |
| # Add Pareto frontier line (monotonically increasing best score over time) | |
| if len(plot_df) > 1: | |
| # Compute Pareto frontier: only include points that set a new best score | |
| frontier_dates = [] | |
| frontier_scores = [] | |
| max_score_so_far = float('-inf') | |
| for _, row in plot_df.iterrows(): | |
| current_score = row[score_col] | |
| current_date = row['release_date'] | |
| if current_score > max_score_so_far: | |
| # This point is on the Pareto frontier | |
| frontier_dates.append(current_date) | |
| frontier_scores.append(current_score) | |
| max_score_so_far = current_score | |
| if frontier_dates: | |
| fig.add_trace(go.Scatter( | |
| x=frontier_dates, | |
| y=frontier_scores, | |
| mode='lines', | |
| line=dict(color='#FFE165', width=2, dash='dash'), # primary yellow, dashed | |
| name='Pareto Frontier', | |
| hoverinfo='skip', | |
| showlegend=False | |
| )) | |
| # Calculate axis ranges | |
| min_date = plot_df['release_date'].min() | |
| max_date = plot_df['release_date'].max() | |
| min_score = plot_df[score_col].min() | |
| max_score = plot_df[score_col].max() | |
| y_min = min_score - 5 if min_score > 5 else 0 | |
| y_max = max_score + 10 # Extra space for labels | |
| # Build hover text for each point | |
| hover_texts = [] | |
| for _, row in plot_df.iterrows(): | |
| model_name = row.get(model_col, 'Unknown') | |
| openness = row.get('Openness', row.get('openness', 'unknown')) | |
| h_pad = " " | |
| hover_text = f"<br>{h_pad}<b>{model_name}</b>{h_pad}<br>" | |
| hover_text += f"{h_pad}Release: <b>{row['release_date'].strftime('%Y-%m-%d')}</b>{h_pad}<br>" | |
| hover_text += f"{h_pad}Average Score: <b>{row[score_col]:.1f}</b>{h_pad}<br>" | |
| hover_text += f"{h_pad}Openness: <b>{openness}</b>{h_pad}<br>" | |
| hover_texts.append(hover_text) | |
| plot_df['hover_text'] = hover_texts | |
| # Add invisible markers for hover functionality | |
| fig.add_trace(go.Scatter( | |
| x=plot_df['release_date'], | |
| y=plot_df[score_col], | |
| mode='markers', | |
| name='Models', | |
| showlegend=False, | |
| text=plot_df['hover_text'], | |
| hoverinfo='text', | |
| marker=dict( | |
| color='rgba(0,0,0,0)', # Invisible markers | |
| size=25, # Large enough for hover detection | |
| opacity=0 | |
| ) | |
| )) | |
| # Add company logo images for each data point using data coordinates | |
| layout_images = [] | |
| labels_data = [] | |
| for _, row in plot_df.iterrows(): | |
| model_name = row.get(model_col, '') | |
| company_info = get_company_from_model(model_name) | |
| logo_path = company_info['path'] | |
| # Read the SVG file and encode as base64 data URI | |
| if os.path.exists(logo_path): | |
| try: | |
| with open(logo_path, 'rb') as f: | |
| encoded_logo = base64.b64encode(f.read()).decode('utf-8') | |
| logo_uri = f"data:image/svg+xml;base64,{encoded_logo}" | |
| x_val = row['release_date'] | |
| y_val = row[score_col] | |
| # Use data coordinates for precise alignment | |
| layout_images.append(dict( | |
| source=logo_uri, | |
| xref="x", | |
| yref="y", | |
| x=x_val, | |
| y=y_val, | |
| sizex=15 * 24 * 60 * 60 * 1000, # ~15 days in milliseconds | |
| sizey=3, # score units | |
| xanchor="center", | |
| yanchor="middle", | |
| layer="above" | |
| )) | |
| # Store label data for annotation | |
| labels_data.append({ | |
| 'x': x_val, | |
| 'y': y_val, | |
| 'label': model_name | |
| }) | |
| except Exception: | |
| pass | |
| # Add model name labels above each point | |
| for item in labels_data: | |
| fig.add_annotation( | |
| x=item['x'], | |
| y=item['y'], | |
| xref="x", | |
| yref="y", | |
| text=item['label'], | |
| showarrow=False, | |
| yshift=20, | |
| font=STANDARD_FONT, | |
| xanchor='center', | |
| yanchor='bottom' | |
| ) | |
| # Build layout configuration | |
| layout_config = dict( | |
| **STANDARD_LAYOUT, | |
| title="Model Performance Evolution Over Time", | |
| xaxis=dict( | |
| title="Model Release Date", | |
| range=[min_date - pd.Timedelta(days=15), max_date + pd.Timedelta(days=15)] | |
| ), | |
| yaxis=dict( | |
| title="Average Score", | |
| range=[y_min, y_max] | |
| ), | |
| ) | |
| # Add company logo images to the layout | |
| if layout_images: | |
| layout_config['images'] = layout_images | |
| fig.update_layout(**layout_config) | |
| # Add OpenHands branding | |
| add_branding_to_figure(fig) | |
| return fig | |
| def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure: | |
| """ | |
| Create a scatter plot showing accuracy vs parameter count for open-weights models. | |
| Uses company logos as markers to match the existing chart styling. | |
| Args: | |
| df: DataFrame with columns including 'parameter_count_b' or 'Parameter_Count_B', | |
| 'active_parameter_count_b' or 'Active_Parameter_Count_B', | |
| 'average score', 'openness', 'Language Model' | |
| Returns: | |
| Plotly figure showing accuracy vs model size | |
| """ | |
| import numpy as np | |
| # Handle different column name formats for parameter count | |
| param_col = None | |
| for col in ['parameter_count_b', 'Parameter_Count_B', 'Parameter Count B']: | |
| if col in df.columns: | |
| param_col = col | |
| break | |
| active_param_col = None | |
| for col in ['active_parameter_count_b', 'Active_Parameter_Count_B', 'Active Parameter Count B']: | |
| if col in df.columns: | |
| active_param_col = col | |
| break | |
| if df.empty or param_col is None: | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="No parameter count data available", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.5, showarrow=False, | |
| font=STANDARD_FONT | |
| ) | |
| fig.update_layout(**STANDARD_LAYOUT) | |
| return fig | |
| # Filter to only open-weights models with parameter data | |
| open_aliases = [aliases.CANONICAL_OPENNESS_OPEN] + list(aliases.OPENNESS_ALIASES.get(aliases.CANONICAL_OPENNESS_OPEN, [])) | |
| # Get openness column | |
| openness_col = 'Openness' if 'Openness' in df.columns else 'openness' | |
| plot_df = df[ | |
| (df[param_col].notna()) & | |
| (df[openness_col].isin(open_aliases)) | |
| ].copy() | |
| if plot_df.empty: | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="No open-weights models with parameter data available", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.5, showarrow=False, | |
| font=STANDARD_FONT | |
| ) | |
| fig.update_layout(**STANDARD_LAYOUT) | |
| return fig | |
| # Get the score column (handle different naming conventions) | |
| score_col = None | |
| for col in ['average score', 'Average Score', 'Average score']: | |
| if col in plot_df.columns: | |
| score_col = col | |
| break | |
| if score_col is None: | |
| for col in plot_df.columns: | |
| if 'score' in col.lower() and 'average' in col.lower(): | |
| score_col = col | |
| break | |
| if score_col is None: | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text="No score data available", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.5, showarrow=False, | |
| font=STANDARD_FONT | |
| ) | |
| fig.update_layout(**STANDARD_LAYOUT) | |
| return fig | |
| # Get model name column | |
| model_col = None | |
| for col in ['Language Model', 'Language model', 'llm_base']: | |
| if col in plot_df.columns: | |
| model_col = col | |
| break | |
| if model_col is None: | |
| model_col = 'Language Model' # Default | |
| fig = go.Figure() | |
| # Prepare data for plotting | |
| x_values = [] | |
| y_values = [] | |
| hover_texts = [] | |
| model_names = [] | |
| total_params_list = [] | |
| for _, row in plot_df.iterrows(): | |
| total_params = row[param_col] | |
| active_params = row.get(active_param_col) if active_param_col else None | |
| model_name = row.get(model_col, 'Unknown') | |
| score = row[score_col] | |
| # Use active params for x-axis if available (more meaningful for MoE) | |
| x_val = active_params if pd.notna(active_params) else total_params | |
| x_values.append(x_val) | |
| y_values.append(score) | |
| model_names.append(model_name) | |
| total_params_list.append(total_params) | |
| # Create hover text matching existing chart style | |
| h_pad = " " | |
| hover_text = f"<br>{h_pad}<b>{model_name}</b>{h_pad}<br>" | |
| hover_text += f"{h_pad}Total Params: <b>{total_params:.0f}B</b>{h_pad}<br>" | |
| if pd.notna(active_params): | |
| hover_text += f"{h_pad}Active Params: <b>{active_params:.0f}B</b>{h_pad}<br>" | |
| hover_text += f"{h_pad}Average Score: <b>{score:.1f}</b>{h_pad}<br>" | |
| hover_texts.append(hover_text) | |
| # Calculate axis ranges for domain coordinate conversion | |
| min_x = min(x_values) | |
| max_x = max(x_values) | |
| x_min_log = np.log10(min_x * 0.5) if min_x > 0 else 0 | |
| x_max_log = np.log10(max_x * 1.5) if max_x > 0 else 3 | |
| min_score = min(y_values) | |
| max_score = max(y_values) | |
| y_min = min_score - 5 if min_score > 5 else 0 | |
| y_max = max_score + 10 # Extra space for labels | |
| # Add invisible markers for hover functionality | |
| fig.add_trace(go.Scatter( | |
| x=x_values, | |
| y=y_values, | |
| mode='markers', | |
| name='Models', | |
| showlegend=False, | |
| text=hover_texts, | |
| hoverinfo='text', | |
| marker=dict( | |
| color='rgba(0,0,0,0)', # Invisible markers | |
| size=25, # Large enough for hover detection | |
| opacity=0 | |
| ) | |
| )) | |
| # Add company logo images for each data point | |
| layout_images = [] | |
| frontier_labels_data = [] | |
| for i, (x_val, y_val, model_name, total_params) in enumerate(zip(x_values, y_values, model_names, total_params_list)): | |
| company_info = get_company_from_model(model_name) | |
| logo_path = company_info['path'] | |
| # Read the SVG file and encode as base64 data URI | |
| if os.path.exists(logo_path): | |
| try: | |
| with open(logo_path, 'rb') as f: | |
| encoded_logo = base64.b64encode(f.read()).decode('utf-8') | |
| logo_uri = f"data:image/svg+xml;base64,{encoded_logo}" | |
| # Convert to domain coordinates (0-1 range) for log scale x-axis | |
| if x_val > 0: | |
| log_x = np.log10(x_val) | |
| domain_x = (log_x - x_min_log) / (x_max_log - x_min_log) | |
| else: | |
| domain_x = 0 | |
| domain_y = (y_val - y_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0.5 | |
| # Clamp to valid range | |
| domain_x = max(0.02, min(0.98, domain_x)) | |
| domain_y = max(0.02, min(0.98, domain_y)) | |
| # Scale logo size based on total params | |
| size_scale = 0.03 + (total_params / 2000) # Larger models = larger logos | |
| size_scale = min(size_scale, 0.06) # Cap size | |
| layout_images.append(dict( | |
| source=logo_uri, | |
| xref="x domain", | |
| yref="y domain", | |
| x=domain_x, | |
| y=domain_y, | |
| sizex=size_scale, | |
| sizey=size_scale * 1.5, | |
| xanchor="center", | |
| yanchor="middle", | |
| layer="above" | |
| )) | |
| # Store label data for annotation | |
| frontier_labels_data.append({ | |
| 'x': domain_x, | |
| 'y': domain_y, | |
| 'label': model_name | |
| }) | |
| except Exception: | |
| pass | |
| # Add model name labels above each point | |
| for item in frontier_labels_data: | |
| fig.add_annotation( | |
| x=item['x'], | |
| y=item['y'], | |
| xref="x domain", | |
| yref="y domain", | |
| text=item['label'], | |
| showarrow=False, | |
| yshift=25, | |
| font=STANDARD_FONT, | |
| xanchor='center', | |
| yanchor='bottom' | |
| ) | |
| # Build layout configuration | |
| layout_config = dict( | |
| **STANDARD_LAYOUT, | |
| title="Open Model Accuracy by Size", | |
| xaxis=dict( | |
| title="Active Parameters (Billions)", | |
| type="log", | |
| range=[x_min_log, x_max_log] | |
| ), | |
| yaxis=dict( | |
| title="Average Score", | |
| range=[y_min, y_max] | |
| ), | |
| ) | |
| # Add company logo images to the layout | |
| if layout_images: | |
| layout_config['images'] = layout_images | |
| fig.update_layout(**layout_config) | |
| # Add annotation explaining marker size | |
| fig.add_annotation( | |
| text="Logo size indicates total parameter count", | |
| xref="paper", yref="paper", | |
| x=0.02, y=-0.08, | |
| showarrow=False, | |
| font=STANDARD_FONT, | |
| align='left' | |
| ) | |
| # Add OpenHands branding | |
| add_branding_to_figure(fig) | |
| return fig | |