Spaces:

HeshamHaroon
/

Arabic-Function-Calling-Leaderboard

Running

App Files Files Community

Arabic-Function-Calling-Leaderboard / afcl /visualization /charts.py

HeshamHaroon

Initial release: Arabic Function Calling Leaderboard

566d03e verified 18 days ago

raw

history blame contribute delete

9.72 kB

	"""
	Visualization Charts
	====================

	Plotly-based visualizations for the Arabic Function Calling Leaderboard.
	"""

	import plotly.graph_objects as go
	import plotly.express as px
	from typing import Dict, List, Optional
	import pandas as pd


	# Arabic category names mapping
	CATEGORY_NAMES_AR = {
	"simple": "بسيط",
	"multiple": "متعدد",
	"parallel": "متوازي",
	"parallel_multiple": "متوازي متعدد",
	"irrelevance": "اللا صلة",
	"dialect_handling": "اللهجات",
	"multi_turn": "متعدد الأدوار",
	"native_arabic": "العربي الأصلي",
	"java": "جافا",
	"javascript": "جافاسكريبت",
	"rest": "REST",
	"sql": "SQL"
	}

	# Color palette for models
	MODEL_COLORS = [
	"#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
	"#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"
	]


	def create_radar_chart(
	model_scores: Dict[str, Dict[str, float]],
	categories: Optional[List[str]] = None,
	use_arabic: bool = True,
	title: str = "Model Comparison"
	) -> go.Figure:
	"""
	Create a radar/spider chart comparing models across categories.

	Args:
	model_scores: Dict mapping model names to category scores
	categories: Categories to include (defaults to main evaluation categories)
	use_arabic: Whether to use Arabic labels
	title: Chart title

	Returns:
	Plotly Figure object
	"""
	if categories is None:
	categories = ["simple", "multiple", "parallel", "parallel_multiple",
	"irrelevance", "dialect_handling"]

	# Prepare category labels
	if use_arabic:
	labels = [CATEGORY_NAMES_AR.get(cat, cat) for cat in categories]
	else:
	labels = categories

	fig = go.Figure()

	for i, (model_name, scores) in enumerate(model_scores.items()):
	values = [scores.get(cat, 0) for cat in categories]
	# Close the radar chart
	values_closed = values + [values[0]]
	labels_closed = labels + [labels[0]]

	fig.add_trace(go.Scatterpolar(
	r=values_closed,
	theta=labels_closed,
	fill='toself',
	name=model_name,
	line_color=MODEL_COLORS[i % len(MODEL_COLORS)],
	opacity=0.7
	))

	fig.update_layout(
	polar=dict(
	radialaxis=dict(
	visible=True,
	range=[0, 100]
	)
	),
	showlegend=True,
	title=dict(
	text=title,
	font=dict(size=16)
	),
	font=dict(
	family="Noto Kufi Arabic, Arial" if use_arabic else "Arial"
	)
	)

	return fig


	def create_bar_chart(
	leaderboard_data: List[Dict],
	metric: str = "overall",
	top_n: int = 10,
	use_arabic: bool = True,
	title: str = "Top Models"
	) -> go.Figure:
	"""
	Create a horizontal bar chart of top models.

	Args:
	leaderboard_data: List of model entries with scores
	metric: Metric to display (default: 'overall')
	top_n: Number of top models to show
	use_arabic: Whether to use Arabic labels
	title: Chart title

	Returns:
	Plotly Figure object
	"""
	# Sort and get top N
	sorted_data = sorted(
	leaderboard_data,
	key=lambda x: x.get(metric, 0),
	reverse=True
	)[:top_n]

	# Reverse for horizontal bar chart (top at top)
	sorted_data = sorted_data[::-1]

	models = [d.get('model', d.get('name', 'Unknown')) for d in sorted_data]
	scores = [d.get(metric, 0) for d in sorted_data]

	# Color based on score ranges
	colors = []
	for score in scores:
	if score >= 80:
	colors.append('#2ca02c') # Green
	elif score >= 60:
	colors.append('#1f77b4') # Blue
	elif score >= 40:
	colors.append('#ff7f0e') # Orange
	else:
	colors.append('#d62728') # Red

	fig = go.Figure(go.Bar(
	x=scores,
	y=models,
	orientation='h',
	marker_color=colors,
	text=[f"{s:.1f}%" for s in scores],
	textposition='outside'
	))

	metric_label = CATEGORY_NAMES_AR.get(metric, metric) if use_arabic else metric

	fig.update_layout(
	title=dict(
	text=title,
	font=dict(size=16)
	),
	xaxis=dict(
	title="الدقة (%)" if use_arabic else "Accuracy (%)",
	range=[0, 105]
	),
	yaxis=dict(
	title=""
	),
	font=dict(
	family="Noto Kufi Arabic, Arial" if use_arabic else "Arial"
	),
	height=max(400, len(models) * 40)
	)

	return fig


	def create_category_comparison(
	leaderboard_data: List[Dict],
	models: Optional[List[str]] = None,
	use_arabic: bool = True,
	title: str = "Category Performance Comparison"
	) -> go.Figure:
	"""
	Create a grouped bar chart comparing models across categories.

	Args:
	leaderboard_data: List of model entries with category scores
	models: List of model names to include (default: top 5)
	use_arabic: Whether to use Arabic labels
	title: Chart title

	Returns:
	Plotly Figure object
	"""
	# Categories to show
	categories = ["simple", "multiple", "parallel", "parallel_multiple",
	"irrelevance", "dialect_handling"]

	# Get models to compare
	if models is None:
	sorted_data = sorted(
	leaderboard_data,
	key=lambda x: x.get('overall', 0),
	reverse=True
	)[:5]
	models = [d.get('model', d.get('name', 'Unknown')) for d in sorted_data]

	# Filter data for selected models
	model_data = {
	d.get('model', d.get('name', 'Unknown')): d
	for d in leaderboard_data
	if d.get('model', d.get('name', 'Unknown')) in models
	}

	# Prepare labels
	if use_arabic:
	cat_labels = [CATEGORY_NAMES_AR.get(cat, cat) for cat in categories]
	else:
	cat_labels = categories

	fig = go.Figure()

	for i, model in enumerate(models):
	if model in model_data:
	scores = [model_data[model].get(cat, 0) for cat in categories]
	fig.add_trace(go.Bar(
	name=model,
	x=cat_labels,
	y=scores,
	marker_color=MODEL_COLORS[i % len(MODEL_COLORS)]
	))

	fig.update_layout(
	barmode='group',
	title=dict(
	text=title,
	font=dict(size=16)
	),
	xaxis=dict(
	title="الفئة" if use_arabic else "Category",
	tickangle=-45 if use_arabic else 0
	),
	yaxis=dict(
	title="الدقة (%)" if use_arabic else "Accuracy (%)",
	range=[0, 105]
	),
	font=dict(
	family="Noto Kufi Arabic, Arial" if use_arabic else "Arial"
	),
	legend=dict(
	orientation="h",
	yanchor="bottom",
	y=1.02,
	xanchor="right",
	x=1
	),
	height=500
	)

	return fig


	def create_dialect_breakdown(
	model_scores: Dict[str, Dict[str, float]],
	use_arabic: bool = True,
	title: str = "Dialect Performance"
	) -> go.Figure:
	"""
	Create a chart showing performance across Arabic dialects.

	Args:
	model_scores: Dict mapping model names to dialect scores
	use_arabic: Whether to use Arabic labels
	title: Chart title

	Returns:
	Plotly Figure object
	"""
	dialects = ["msa", "egyptian", "gulf", "levantine"]
	dialect_labels = {
	"msa": "الفصحى" if use_arabic else "MSA",
	"egyptian": "المصري" if use_arabic else "Egyptian",
	"gulf": "الخليجي" if use_arabic else "Gulf",
	"levantine": "الشامي" if use_arabic else "Levantine"
	}

	fig = go.Figure()

	for i, (model_name, scores) in enumerate(model_scores.items()):
	dialect_scores = [scores.get(d, 0) for d in dialects]
	labels = [dialect_labels[d] for d in dialects]

	fig.add_trace(go.Bar(
	name=model_name,
	x=labels,
	y=dialect_scores,
	marker_color=MODEL_COLORS[i % len(MODEL_COLORS)]
	))

	fig.update_layout(
	barmode='group',
	title=dict(
	text=title,
	font=dict(size=16)
	),
	xaxis=dict(title="اللهجة" if use_arabic else "Dialect"),
	yaxis=dict(
	title="الدقة (%)" if use_arabic else "Accuracy (%)",
	range=[0, 105]
	),
	font=dict(
	family="Noto Kufi Arabic, Arial" if use_arabic else "Arial"
	),
	height=400
	)

	return fig


	def create_progress_over_time(
	history_data: List[Dict],
	models: Optional[List[str]] = None,
	title: str = "Performance Over Time"
	) -> go.Figure:
	"""
	Create a line chart showing model performance over time.

	Args:
	history_data: List of evaluation snapshots with dates
	models: Models to include
	title: Chart title

	Returns:
	Plotly Figure object
	"""
	if not history_data:
	# Return empty figure
	fig = go.Figure()
	fig.update_layout(title=title)
	return fig

	df = pd.DataFrame(history_data)

	if models is not None:
	df = df[df['model'].isin(models)]

	fig = px.line(
	df,
	x='date',
	y='overall',
	color='model',
	title=title,
	labels={'overall': 'Overall Score (%)', 'date': 'Date', 'model': 'Model'}
	)

	fig.update_layout(
	yaxis=dict(range=[0, 100]),
	height=400
	)

	return fig