Spaces:

aaditya-raj
/

e6test

Sleeping

File size: 8,449 Bytes

e3a1a9e

import plotly.graph_objects as go
import pandas as pd
import numpy as np
from typing import Dict, List

class EvaluationVisualizer:
    def __init__(self):
        self.metric_colors = {
            'instruction_following': '#667eea', 'hallucination_score': '#48bb78',
            'assumption_control': '#f6ad55', 'coherence': '#63b3ed',
            'accuracy': '#fc8181', 'overall_score': '#764ba2'
        }

    #Spider chart with multi dimensional scores for single evaluation
    
    def create_spider_chart(self, scores: Dict, agent_name: str = "Agent") -> go.Figure:
        
        metrics = ['Instruction\nFollowing', 'Hallucination\nControl', 'Assumption\nControl', 'Coherence', 'Accuracy']
        values = [
            scores.get('instruction_following', 0), scores.get('hallucination_score', 0),
            scores.get('assumption_control', 0), scores.get('coherence', 0),
            scores.get('accuracy', 0)
        ]
        fig = go.Figure()
        fig.add_trace(go.Scatterpolar(r=values, theta=metrics, fill='toself', name=agent_name, line=dict(color=self.metric_colors['instruction_following'])))
        fig.update_layout(polar=dict(radialaxis=dict(visible=True, range=[0, 1])), title=f"{agent_name} - Performance Spider Chart", template='plotly_white')
        return fig
        

    # Horizontal bar chart showing scores for single evaluation
    
    def create_score_bars(self, scores: Dict, agent_name: str = "Agent") -> go.Figure:
        
        metric_map = {
            'overall_score': 'Overall Score', 'instruction_following': 'Instruction Following',
            'hallucination_score': 'Hallucination Control', 'assumption_control': 'Assumption Control',
            'coherence': 'Coherence', 'accuracy': 'Accuracy'
        }
        metrics = [label for key, label in metric_map.items() if key in scores]
        values = [scores[key] for key in metric_map if key in scores]
        colors = [self.metric_colors.get(key, '#667eea') for key in metric_map if key in scores]
        
        fig = go.Figure(go.Bar(y=metrics, x=values, orientation='h', marker=dict(color=colors), text=[f'{v:.2f}' for v in values], textposition='auto'))
        fig.update_layout(title=f"{agent_name} - Score Breakdown", xaxis=dict(range=[0, 1]), template='plotly_white', showlegend=False)
        return fig
        

    # Heatmap showing evaluation scores across agents and tasks
    
    def create_evaluation_heatmap(self, results: List[Dict]) -> go.Figure:
        
        if not results: return go.Figure().update_layout(title="No data for heatmap")
        
        df_data = [{'agent': r['agent_name'], 'task': r['task_id'], 'score': r['scores'].get('overall_score', 0)} for r in results]
        df = pd.DataFrame(df_data)
        pivot_df = df.pivot(index='agent', columns='task', values='score')
        
        fig = go.Figure(data=go.Heatmap(z=pivot_df.values, x=pivot_df.columns, y=pivot_df.index, colorscale='Viridis', colorbar=dict(title="Score")))
        fig.update_layout(title="Agent Performance Heatmap", xaxis_title="Tasks", yaxis_title="Agents", template='plotly_white')
        return fig
        

    # Violin plots for spread in scores across Agents
    
    def create_score_distribution(self, results: List[Dict]) -> go.Figure:
        
        if not results: return go.Figure().update_layout(title="No data for distribution plot")

        df_data = []
        for r in results:
            entry = {'Agent': r.get('agent_name', 'Unknown')}
            entry.update(r['scores'])
            df_data.append(entry)
        df = pd.DataFrame(df_data).melt(id_vars=['Agent'], value_vars=self.metric_colors.keys(), var_name='Metric', value_name='Score')
        
        metric_map = {k: k.replace('_', ' ').title() for k in self.metric_colors.keys()}
        df['Metric'] = df['Metric'].map(metric_map)

        fig = go.Figure()
        for metric in df['Metric'].unique():
            fig.add_trace(go.Violin(y=df[df['Metric'] == metric]['Score'], name=metric, box_visible=True, meanline_visible=True))
        
        fig.update_layout(title="Score Distribution Analysis", yaxis_title="Score", template='plotly_white', showlegend=False)
        return fig

    

         # Bar chart showing average overall scores of each agent
        
    def create_performance_trends(self, results: List[Dict]) -> go.Figure:
        
        if not results:
            return go.Figure().update_layout(title="No data for average performance plot")
        
        agent_scores = {}
        for r in results:
            agent = r['agent_name']
            if agent not in agent_scores:
                agent_scores[agent] = []
            agent_scores[agent].append(r['scores'].get('overall_score', 0))
        
        # Compute averages
        avg_scores = {agent: np.mean(scores) for agent, scores in agent_scores.items()}
        
        fig = go.Figure(go.Bar(
            x=list(avg_scores.keys()),
            y=list(avg_scores.values()),
            text=[f"{v:.2f}" for v in avg_scores.values()],
            textposition="auto",
            marker=dict(color="#667eea")
        ))
        
        fig.update_layout(
            title="Average Overall Scores by Agent",
            xaxis_title="Agents",
            yaxis_title="Average Overall Score",
            template="plotly_white"
        )
        return fig


        
        # Comparison chart between two agents
        
    def create_agent_comparison(self, agent1_results: List[Dict], agent2_results: List[Dict]) -> go.Figure:
        
        metrics = list(self.metric_colors.keys())
        agent1_name = agent1_results[0].get('agent_name', 'Agent 1')
        agent2_name = agent2_results[0].get('agent_name', 'Agent 2')

        def get_avg_scores(results):
            return {m: np.mean([r['scores'].get(m, 0) for r in results]) for m in metrics}

        avg1 = get_avg_scores(agent1_results)
        avg2 = get_avg_scores(agent2_results)
        metric_labels = [m.replace('_', ' ').title() for m in metrics]

        fig = go.Figure(data=[
            go.Bar(name=agent1_name, x=metric_labels, y=[avg1[m] for m in metrics]),
            go.Bar(name=agent2_name, x=metric_labels, y=[avg2[m] for m in metrics])
        ])
        fig.update_layout(barmode='group', title="Agent Performance Comparison", yaxis_title="Average Score", template='plotly_white')
        return fig


        # Spider chart comparing two agents
        
    def create_radar_comparison(self, agent1_results: List[Dict], agent2_results: List[Dict]) -> go.Figure:
        
        metrics = ['instruction_following', 'hallucination_score', 'assumption_control', 'coherence', 'accuracy']
        metric_labels = [m.replace('_', ' ').title() for m in metrics]
        
        def get_avg_scores(results):
            return [np.mean([r['scores'].get(m, 0) for r in results]) for m in metrics]
            
        agent1_values = get_avg_scores(agent1_results)
        agent2_values = get_avg_scores(agent2_results)
        
        fig = go.Figure()
        fig.add_trace(go.Scatterpolar(r=agent1_values, theta=metric_labels, fill='toself', name=agent1_results[0].get('agent_name', 'Agent 1')))
        fig.add_trace(go.Scatterpolar(r=agent2_values, theta=metric_labels, fill='toself', name=agent2_results[0].get('agent_name', 'Agent 2')))
        fig.update_layout(polar=dict(radialaxis=dict(visible=True, range=[0, 1])), title="Agent Comparison - Radar Chart", template='plotly_white')
        return fig


        #performance gap between two agents across metrics
        
    def create_performance_delta(self, agent1_results: List[Dict], agent2_results: List[Dict]) -> go.Figure:
        
        metrics = list(self.metric_colors.keys())
        
        def get_avg_scores(results):
            return {m: np.mean([r['scores'].get(m, 0) for r in results]) for m in metrics}

        avg1 = get_avg_scores(agent1_results)
        avg2 = get_avg_scores(agent2_results)
        deltas = [avg2[m] - avg1[m] for m in metrics]
        colors = ['#48bb78' if d >= 0 else '#fc8181' for d in deltas]
        metric_labels = [m.replace('_', ' ').title() for m in metrics]
        
        fig = go.Figure(go.Bar(x=metric_labels, y=deltas, marker_color=colors, text=[f'{d:+.2f}' for d in deltas]))
        fig.update_layout(title="Performance Delta (Agent 2 vs Agent 1)", yaxis_title="Score Difference", template='plotly_white')
        return fig