Spaces:

aaditya-raj
/

e6test

Sleeping

App Files Files Community

e6test / visualizer_module.py

aaditya-raj

Upload 4 files

e3a1a9e verified 5 months ago

raw

history blame contribute delete

8.45 kB

	import plotly.graph_objects as go
	import pandas as pd
	import numpy as np
	from typing import Dict, List

	class EvaluationVisualizer:
	def __init__(self):
	self.metric_colors = {
	'instruction_following': '#667eea', 'hallucination_score': '#48bb78',
	'assumption_control': '#f6ad55', 'coherence': '#63b3ed',
	'accuracy': '#fc8181', 'overall_score': '#764ba2'
	}

	#Spider chart with multi dimensional scores for single evaluation

	def create_spider_chart(self, scores: Dict, agent_name: str = "Agent") -> go.Figure:

	metrics = ['Instruction\nFollowing', 'Hallucination\nControl', 'Assumption\nControl', 'Coherence', 'Accuracy']
	values = [
	scores.get('instruction_following', 0), scores.get('hallucination_score', 0),
	scores.get('assumption_control', 0), scores.get('coherence', 0),
	scores.get('accuracy', 0)
	]
	fig = go.Figure()
	fig.add_trace(go.Scatterpolar(r=values, theta=metrics, fill='toself', name=agent_name, line=dict(color=self.metric_colors['instruction_following'])))
	fig.update_layout(polar=dict(radialaxis=dict(visible=True, range=[0, 1])), title=f"{agent_name} - Performance Spider Chart", template='plotly_white')
	return fig


	# Horizontal bar chart showing scores for single evaluation

	def create_score_bars(self, scores: Dict, agent_name: str = "Agent") -> go.Figure:

	metric_map = {
	'overall_score': 'Overall Score', 'instruction_following': 'Instruction Following',
	'hallucination_score': 'Hallucination Control', 'assumption_control': 'Assumption Control',
	'coherence': 'Coherence', 'accuracy': 'Accuracy'
	}
	metrics = [label for key, label in metric_map.items() if key in scores]
	values = [scores[key] for key in metric_map if key in scores]
	colors = [self.metric_colors.get(key, '#667eea') for key in metric_map if key in scores]

	fig = go.Figure(go.Bar(y=metrics, x=values, orientation='h', marker=dict(color=colors), text=[f'{v:.2f}' for v in values], textposition='auto'))
	fig.update_layout(title=f"{agent_name} - Score Breakdown", xaxis=dict(range=[0, 1]), template='plotly_white', showlegend=False)
	return fig


	# Heatmap showing evaluation scores across agents and tasks

	def create_evaluation_heatmap(self, results: List[Dict]) -> go.Figure:

	if not results: return go.Figure().update_layout(title="No data for heatmap")

	df_data = [{'agent': r['agent_name'], 'task': r['task_id'], 'score': r['scores'].get('overall_score', 0)} for r in results]
	df = pd.DataFrame(df_data)
	pivot_df = df.pivot(index='agent', columns='task', values='score')

	fig = go.Figure(data=go.Heatmap(z=pivot_df.values, x=pivot_df.columns, y=pivot_df.index, colorscale='Viridis', colorbar=dict(title="Score")))
	fig.update_layout(title="Agent Performance Heatmap", xaxis_title="Tasks", yaxis_title="Agents", template='plotly_white')
	return fig


	# Violin plots for spread in scores across Agents

	def create_score_distribution(self, results: List[Dict]) -> go.Figure:

	if not results: return go.Figure().update_layout(title="No data for distribution plot")

	df_data = []
	for r in results:
	entry = {'Agent': r.get('agent_name', 'Unknown')}
	entry.update(r['scores'])
	df_data.append(entry)
	df = pd.DataFrame(df_data).melt(id_vars=['Agent'], value_vars=self.metric_colors.keys(), var_name='Metric', value_name='Score')

	metric_map = {k: k.replace('_', ' ').title() for k in self.metric_colors.keys()}
	df['Metric'] = df['Metric'].map(metric_map)

	fig = go.Figure()
	for metric in df['Metric'].unique():
	fig.add_trace(go.Violin(y=df[df['Metric'] == metric]['Score'], name=metric, box_visible=True, meanline_visible=True))

	fig.update_layout(title="Score Distribution Analysis", yaxis_title="Score", template='plotly_white', showlegend=False)
	return fig



	# Bar chart showing average overall scores of each agent

	def create_performance_trends(self, results: List[Dict]) -> go.Figure:

	if not results:
	return go.Figure().update_layout(title="No data for average performance plot")

	agent_scores = {}
	for r in results:
	agent = r['agent_name']
	if agent not in agent_scores:
	agent_scores[agent] = []
	agent_scores[agent].append(r['scores'].get('overall_score', 0))

	# Compute averages
	avg_scores = {agent: np.mean(scores) for agent, scores in agent_scores.items()}

	fig = go.Figure(go.Bar(
	x=list(avg_scores.keys()),
	y=list(avg_scores.values()),
	text=[f"{v:.2f}" for v in avg_scores.values()],
	textposition="auto",
	marker=dict(color="#667eea")
	))

	fig.update_layout(
	title="Average Overall Scores by Agent",
	xaxis_title="Agents",
	yaxis_title="Average Overall Score",
	template="plotly_white"
	)
	return fig



	# Comparison chart between two agents

	def create_agent_comparison(self, agent1_results: List[Dict], agent2_results: List[Dict]) -> go.Figure:

	metrics = list(self.metric_colors.keys())
	agent1_name = agent1_results[0].get('agent_name', 'Agent 1')
	agent2_name = agent2_results[0].get('agent_name', 'Agent 2')

	def get_avg_scores(results):
	return {m: np.mean([r['scores'].get(m, 0) for r in results]) for m in metrics}

	avg1 = get_avg_scores(agent1_results)
	avg2 = get_avg_scores(agent2_results)
	metric_labels = [m.replace('_', ' ').title() for m in metrics]

	fig = go.Figure(data=[
	go.Bar(name=agent1_name, x=metric_labels, y=[avg1[m] for m in metrics]),
	go.Bar(name=agent2_name, x=metric_labels, y=[avg2[m] for m in metrics])
	])
	fig.update_layout(barmode='group', title="Agent Performance Comparison", yaxis_title="Average Score", template='plotly_white')
	return fig


	# Spider chart comparing two agents

	def create_radar_comparison(self, agent1_results: List[Dict], agent2_results: List[Dict]) -> go.Figure:

	metrics = ['instruction_following', 'hallucination_score', 'assumption_control', 'coherence', 'accuracy']
	metric_labels = [m.replace('_', ' ').title() for m in metrics]

	def get_avg_scores(results):
	return [np.mean([r['scores'].get(m, 0) for r in results]) for m in metrics]

	agent1_values = get_avg_scores(agent1_results)
	agent2_values = get_avg_scores(agent2_results)

	fig = go.Figure()
	fig.add_trace(go.Scatterpolar(r=agent1_values, theta=metric_labels, fill='toself', name=agent1_results[0].get('agent_name', 'Agent 1')))
	fig.add_trace(go.Scatterpolar(r=agent2_values, theta=metric_labels, fill='toself', name=agent2_results[0].get('agent_name', 'Agent 2')))
	fig.update_layout(polar=dict(radialaxis=dict(visible=True, range=[0, 1])), title="Agent Comparison - Radar Chart", template='plotly_white')
	return fig


	#performance gap between two agents across metrics

	def create_performance_delta(self, agent1_results: List[Dict], agent2_results: List[Dict]) -> go.Figure:

	metrics = list(self.metric_colors.keys())

	def get_avg_scores(results):
	return {m: np.mean([r['scores'].get(m, 0) for r in results]) for m in metrics}

	avg1 = get_avg_scores(agent1_results)
	avg2 = get_avg_scores(agent2_results)
	deltas = [avg2[m] - avg1[m] for m in metrics]
	colors = ['#48bb78' if d >= 0 else '#fc8181' for d in deltas]
	metric_labels = [m.replace('_', ' ').title() for m in metrics]

	fig = go.Figure(go.Bar(x=metric_labels, y=deltas, marker_color=colors, text=[f'{d:+.2f}' for d in deltas]))
	fig.update_layout(title="Performance Delta (Agent 2 vs Agent 1)", yaxis_title="Score Difference", template='plotly_white')
	return fig