Spaces:

OpenHands
/

openhands-index

Running

openhands

Fix Evolution Over Time chart

3792b39 4 months ago

19.7 kB

	"""
	Additional visualizations for the OpenHands Index leaderboard.
	"""
	import pandas as pd
	import plotly.graph_objects as go
	import plotly.express as px
	from datetime import datetime
	import os
	import base64
	import aliases

	# Import company logo mapping from ui_components
	from ui_components import get_company_from_model, get_svg_as_data_uri

	# Standard layout configuration matching existing charts
	# Colors aligned with OpenHands brand
	STANDARD_LAYOUT = dict(
	template="plotly_white",
	height=572,
	font=dict(
	family="Outfit, ui-sans-serif, sans-serif",
	color="#0D0D0F", # neutral-950
	),
	hoverlabel=dict(
	bgcolor="#222328", # neutral-800
	font_size=12,
	font_family="Outfit",
	font_color="#F7F8FB", # neutral-50
	),
	legend=dict(
	bgcolor='#F7F8FB', # neutral-50
	),
	margin=dict(b=80), # Extra margin for logo and URL
	)

	# Standard font for annotations
	STANDARD_FONT = dict(
	size=10,
	color='#0D0D0F', # neutral-950
	family='Outfit'
	)

	# OpenHands branding constants
	OPENHANDS_LOGO_PATH = "assets/openhands_logo_color_forwhite.png"
	OPENHANDS_URL = "https://index.openhands.dev"

	# URL annotation for bottom right of charts
	URL_ANNOTATION = dict(
	text=OPENHANDS_URL,
	xref="paper",
	yref="paper",
	x=1,
	y=-0.15,
	xanchor="right",
	yanchor="bottom",
	showarrow=False,
	font=dict(
	family="Outfit, ui-sans-serif, sans-serif",
	size=14,
	color="#82889B", # neutral-400
	),
	)


	def get_openhands_logo_image():
	"""Get the OpenHands logo as a Plotly image dict for chart branding."""
	if os.path.exists(OPENHANDS_LOGO_PATH):
	try:
	with open(OPENHANDS_LOGO_PATH, "rb") as f:
	logo_data = base64.b64encode(f.read()).decode('utf-8')
	return dict(
	source=f"data:image/png;base64,{logo_data}",
	xref="paper",
	yref="paper",
	x=0,
	y=-0.15,
	sizex=0.15,
	sizey=0.15,
	xanchor="left",
	yanchor="bottom",
	)
	except Exception:
	pass
	return None


	def add_branding_to_figure(fig: go.Figure) -> go.Figure:
	"""Add OpenHands logo and URL to a Plotly figure."""
	# Add logo image
	logo_image = get_openhands_logo_image()
	if logo_image:
	existing_images = list(fig.layout.images) if fig.layout.images else []
	existing_images.append(logo_image)
	fig.update_layout(images=existing_images)

	# Add URL annotation
	existing_annotations = list(fig.layout.annotations) if fig.layout.annotations else []
	existing_annotations.append(URL_ANNOTATION)
	fig.update_layout(annotations=existing_annotations)

	return fig


	def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
	"""
	Create a chart showing model performance evolution over release dates.
	Uses company logos as markers to match the existing chart styling.

	Args:
	df: DataFrame with columns including 'release_date' or 'Release_Date', 'Language Model', 'average score', 'openness'

	Returns:
	Plotly figure showing score evolution over time
	"""
	# Handle different column name formats
	release_date_col = None
	for col in ['release_date', 'Release_Date', 'Release Date']:
	if col in df.columns:
	release_date_col = col
	break

	if df.empty or release_date_col is None:
	fig = go.Figure()
	fig.add_annotation(
	text="No release date data available",
	xref="paper", yref="paper",
	x=0.5, y=0.5, showarrow=False,
	font=STANDARD_FONT
	)
	fig.update_layout(**STANDARD_LAYOUT)
	return fig

	# Filter out rows without release dates
	plot_df = df[df[release_date_col].notna() & (df[release_date_col] != '')].copy()

	if plot_df.empty:
	fig = go.Figure()
	fig.add_annotation(
	text="No release date data available",
	xref="paper", yref="paper",
	x=0.5, y=0.5, showarrow=False,
	font=STANDARD_FONT
	)
	fig.update_layout(**STANDARD_LAYOUT)
	return fig

	# Convert release_date to datetime (normalize column name)
	plot_df['release_date'] = pd.to_datetime(plot_df[release_date_col], errors='coerce')
	plot_df = plot_df.dropna(subset=['release_date'])

	# Sort by release date
	plot_df = plot_df.sort_values('release_date')

	# Get the score column (handle different naming conventions)
	score_col = None
	for col in ['average score', 'Average Score', 'Average score']:
	if col in plot_df.columns:
	score_col = col
	break
	if score_col is None:
	for col in plot_df.columns:
	if 'score' in col.lower() and 'average' in col.lower():
	score_col = col
	break

	if score_col is None:
	fig = go.Figure()
	fig.add_annotation(
	text="No score data available",
	xref="paper", yref="paper",
	x=0.5, y=0.5, showarrow=False,
	font=STANDARD_FONT
	)
	fig.update_layout(**STANDARD_LAYOUT)
	return fig

	# Get model name column
	model_col = None
	for col in ['Language Model', 'Language model', 'llm_base']:
	if col in plot_df.columns:
	model_col = col
	break
	if model_col is None:
	model_col = 'Language Model' # Default

	fig = go.Figure()

	# Add Pareto frontier line (monotonically increasing best score over time)
	if len(plot_df) > 1:
	# Compute Pareto frontier: only include points that set a new best score
	frontier_dates = []
	frontier_scores = []
	max_score_so_far = float('-inf')

	for _, row in plot_df.iterrows():
	current_score = row[score_col]
	current_date = row['release_date']

	if current_score > max_score_so_far:
	# This point is on the Pareto frontier
	frontier_dates.append(current_date)
	frontier_scores.append(current_score)
	max_score_so_far = current_score

	if frontier_dates:
	fig.add_trace(go.Scatter(
	x=frontier_dates,
	y=frontier_scores,
	mode='lines',
	line=dict(color='#FFE165', width=2, dash='dash'), # primary yellow, dashed
	name='Pareto Frontier',
	hoverinfo='skip',
	showlegend=False
	))

	# Calculate axis ranges
	min_date = plot_df['release_date'].min()
	max_date = plot_df['release_date'].max()
	min_score = plot_df[score_col].min()
	max_score = plot_df[score_col].max()
	y_min = min_score - 5 if min_score > 5 else 0
	y_max = max_score + 10 # Extra space for labels

	# Build hover text for each point
	hover_texts = []
	for _, row in plot_df.iterrows():
	model_name = row.get(model_col, 'Unknown')
	openness = row.get('Openness', row.get('openness', 'unknown'))
	h_pad = " "
	hover_text = f"<br>{h_pad}<b>{model_name}</b>{h_pad}<br>"
	hover_text += f"{h_pad}Release: <b>{row['release_date'].strftime('%Y-%m-%d')}</b>{h_pad}<br>"
	hover_text += f"{h_pad}Average Score: <b>{row[score_col]:.1f}</b>{h_pad}<br>"
	hover_text += f"{h_pad}Openness: <b>{openness}</b>{h_pad}<br>"
	hover_texts.append(hover_text)

	plot_df['hover_text'] = hover_texts

	# Add invisible markers for hover functionality
	fig.add_trace(go.Scatter(
	x=plot_df['release_date'],
	y=plot_df[score_col],
	mode='markers',
	name='Models',
	showlegend=False,
	text=plot_df['hover_text'],
	hoverinfo='text',
	marker=dict(
	color='rgba(0,0,0,0)', # Invisible markers
	size=25, # Large enough for hover detection
	opacity=0
	)
	))

	# Add company logo images for each data point using data coordinates
	layout_images = []
	labels_data = []

	for _, row in plot_df.iterrows():
	model_name = row.get(model_col, '')
	company_info = get_company_from_model(model_name)
	logo_path = company_info['path']

	# Read the SVG file and encode as base64 data URI
	if os.path.exists(logo_path):
	try:
	with open(logo_path, 'rb') as f:
	encoded_logo = base64.b64encode(f.read()).decode('utf-8')
	logo_uri = f"data:image/svg+xml;base64,{encoded_logo}"

	x_val = row['release_date']
	y_val = row[score_col]

	# Use data coordinates for precise alignment
	layout_images.append(dict(
	source=logo_uri,
	xref="x",
	yref="y",
	x=x_val,
	y=y_val,
	sizex=15 * 24 * 60 * 60 * 1000, # ~15 days in milliseconds
	sizey=3, # score units
	xanchor="center",
	yanchor="middle",
	layer="above"
	))

	# Store label data for annotation
	labels_data.append({
	'x': x_val,
	'y': y_val,
	'label': model_name
	})
	except Exception:
	pass

	# Add model name labels above each point
	for item in labels_data:
	fig.add_annotation(
	x=item['x'],
	y=item['y'],
	xref="x",
	yref="y",
	text=item['label'],
	showarrow=False,
	yshift=20,
	font=STANDARD_FONT,
	xanchor='center',
	yanchor='bottom'
	)

	# Build layout configuration
	layout_config = dict(
	**STANDARD_LAYOUT,
	title="Model Performance Evolution Over Time",
	xaxis=dict(
	title="Model Release Date",
	range=[min_date - pd.Timedelta(days=15), max_date + pd.Timedelta(days=15)]
	),
	yaxis=dict(
	title="Average Score",
	range=[y_min, y_max]
	),
	)

	# Add company logo images to the layout
	if layout_images:
	layout_config['images'] = layout_images

	fig.update_layout(**layout_config)

	# Add OpenHands branding
	add_branding_to_figure(fig)

	return fig


	def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
	"""
	Create a scatter plot showing accuracy vs parameter count for open-weights models.
	Uses company logos as markers to match the existing chart styling.

	Args:
	df: DataFrame with columns including 'parameter_count_b' or 'Parameter_Count_B',
	'active_parameter_count_b' or 'Active_Parameter_Count_B',
	'average score', 'openness', 'Language Model'

	Returns:
	Plotly figure showing accuracy vs model size
	"""
	import numpy as np

	# Handle different column name formats for parameter count
	param_col = None
	for col in ['parameter_count_b', 'Parameter_Count_B', 'Parameter Count B']:
	if col in df.columns:
	param_col = col
	break

	active_param_col = None
	for col in ['active_parameter_count_b', 'Active_Parameter_Count_B', 'Active Parameter Count B']:
	if col in df.columns:
	active_param_col = col
	break

	if df.empty or param_col is None:
	fig = go.Figure()
	fig.add_annotation(
	text="No parameter count data available",
	xref="paper", yref="paper",
	x=0.5, y=0.5, showarrow=False,
	font=STANDARD_FONT
	)
	fig.update_layout(**STANDARD_LAYOUT)
	return fig

	# Filter to only open-weights models with parameter data
	open_aliases = [aliases.CANONICAL_OPENNESS_OPEN] + list(aliases.OPENNESS_ALIASES.get(aliases.CANONICAL_OPENNESS_OPEN, []))

	# Get openness column
	openness_col = 'Openness' if 'Openness' in df.columns else 'openness'

	plot_df = df[
	(df[param_col].notna()) &
	(df[openness_col].isin(open_aliases))
	].copy()

	if plot_df.empty:
	fig = go.Figure()
	fig.add_annotation(
	text="No open-weights models with parameter data available",
	xref="paper", yref="paper",
	x=0.5, y=0.5, showarrow=False,
	font=STANDARD_FONT
	)
	fig.update_layout(**STANDARD_LAYOUT)
	return fig

	# Get the score column (handle different naming conventions)
	score_col = None
	for col in ['average score', 'Average Score', 'Average score']:
	if col in plot_df.columns:
	score_col = col
	break
	if score_col is None:
	for col in plot_df.columns:
	if 'score' in col.lower() and 'average' in col.lower():
	score_col = col
	break

	if score_col is None:
	fig = go.Figure()
	fig.add_annotation(
	text="No score data available",
	xref="paper", yref="paper",
	x=0.5, y=0.5, showarrow=False,
	font=STANDARD_FONT
	)
	fig.update_layout(**STANDARD_LAYOUT)
	return fig

	# Get model name column
	model_col = None
	for col in ['Language Model', 'Language model', 'llm_base']:
	if col in plot_df.columns:
	model_col = col
	break
	if model_col is None:
	model_col = 'Language Model' # Default

	fig = go.Figure()

	# Prepare data for plotting
	x_values = []
	y_values = []
	hover_texts = []
	model_names = []
	total_params_list = []

	for _, row in plot_df.iterrows():
	total_params = row[param_col]
	active_params = row.get(active_param_col) if active_param_col else None
	model_name = row.get(model_col, 'Unknown')
	score = row[score_col]

	# Use active params for x-axis if available (more meaningful for MoE)
	x_val = active_params if pd.notna(active_params) else total_params

	x_values.append(x_val)
	y_values.append(score)
	model_names.append(model_name)
	total_params_list.append(total_params)

	# Create hover text matching existing chart style
	h_pad = " "
	hover_text = f"<br>{h_pad}<b>{model_name}</b>{h_pad}<br>"
	hover_text += f"{h_pad}Total Params: <b>{total_params:.0f}B</b>{h_pad}<br>"
	if pd.notna(active_params):
	hover_text += f"{h_pad}Active Params: <b>{active_params:.0f}B</b>{h_pad}<br>"
	hover_text += f"{h_pad}Average Score: <b>{score:.1f}</b>{h_pad}<br>"
	hover_texts.append(hover_text)

	# Calculate axis ranges for domain coordinate conversion
	min_x = min(x_values)
	max_x = max(x_values)
	x_min_log = np.log10(min_x * 0.5) if min_x > 0 else 0
	x_max_log = np.log10(max_x * 1.5) if max_x > 0 else 3

	min_score = min(y_values)
	max_score = max(y_values)
	y_min = min_score - 5 if min_score > 5 else 0
	y_max = max_score + 10 # Extra space for labels

	# Add invisible markers for hover functionality
	fig.add_trace(go.Scatter(
	x=x_values,
	y=y_values,
	mode='markers',
	name='Models',
	showlegend=False,
	text=hover_texts,
	hoverinfo='text',
	marker=dict(
	color='rgba(0,0,0,0)', # Invisible markers
	size=25, # Large enough for hover detection
	opacity=0
	)
	))

	# Add company logo images for each data point
	layout_images = []
	frontier_labels_data = []

	for i, (x_val, y_val, model_name, total_params) in enumerate(zip(x_values, y_values, model_names, total_params_list)):
	company_info = get_company_from_model(model_name)
	logo_path = company_info['path']

	# Read the SVG file and encode as base64 data URI
	if os.path.exists(logo_path):
	try:
	with open(logo_path, 'rb') as f:
	encoded_logo = base64.b64encode(f.read()).decode('utf-8')
	logo_uri = f"data:image/svg+xml;base64,{encoded_logo}"

	# Convert to domain coordinates (0-1 range) for log scale x-axis
	if x_val > 0:
	log_x = np.log10(x_val)
	domain_x = (log_x - x_min_log) / (x_max_log - x_min_log)
	else:
	domain_x = 0

	domain_y = (y_val - y_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0.5

	# Clamp to valid range
	domain_x = max(0.02, min(0.98, domain_x))
	domain_y = max(0.02, min(0.98, domain_y))

	# Scale logo size based on total params
	size_scale = 0.03 + (total_params / 2000) # Larger models = larger logos
	size_scale = min(size_scale, 0.06) # Cap size

	layout_images.append(dict(
	source=logo_uri,
	xref="x domain",
	yref="y domain",
	x=domain_x,
	y=domain_y,
	sizex=size_scale,
	sizey=size_scale * 1.5,
	xanchor="center",
	yanchor="middle",
	layer="above"
	))

	# Store label data for annotation
	frontier_labels_data.append({
	'x': domain_x,
	'y': domain_y,
	'label': model_name
	})
	except Exception:
	pass

	# Add model name labels above each point
	for item in frontier_labels_data:
	fig.add_annotation(
	x=item['x'],
	y=item['y'],
	xref="x domain",
	yref="y domain",
	text=item['label'],
	showarrow=False,
	yshift=25,
	font=STANDARD_FONT,
	xanchor='center',
	yanchor='bottom'
	)

	# Build layout configuration
	layout_config = dict(
	**STANDARD_LAYOUT,
	title="Open Model Accuracy by Size",
	xaxis=dict(
	title="Active Parameters (Billions)",
	type="log",
	range=[x_min_log, x_max_log]
	),
	yaxis=dict(
	title="Average Score",
	range=[y_min, y_max]
	),
	)

	# Add company logo images to the layout
	if layout_images:
	layout_config['images'] = layout_images

	fig.update_layout(**layout_config)

	# Add annotation explaining marker size
	fig.add_annotation(
	text="Logo size indicates total parameter count",
	xref="paper", yref="paper",
	x=0.02, y=-0.08,
	showarrow=False,
	font=STANDARD_FONT,
	align='left'
	)

	# Add OpenHands branding
	add_branding_to_figure(fig)

	return fig