File size: 6,254 Bytes
71a764a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36d5e94
71a764a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
"""Statistics calculation and visualization functionality"""

import matplotlib.pyplot as plt
import numpy as np
from typing import List, Dict, Optional, Tuple


class StatisticsCalculator:
    """Handles statistical calculations and visualization generation"""
    
    def get_score_statistics(self, scores):
        """Calculate mean and standard deviation of scores."""
        if not scores:
            return None, None
        return np.mean(scores), np.std(scores)
    
    def create_violin_plot(self, prompt_results, user_score, user_tokens):
        """Create horizontal violin plots stacked vertically by token count."""
        token_counts = [1, 2, 3, 4, 5]
        
        # Filter to only include token counts that have data
        token_data = []
        for token_count in token_counts:
            token_scores = [r["cosine_distance"] for r in prompt_results 
                          if r["num_user_tokens"] == token_count]
            if token_scores:
                token_data.append((token_count, token_scores))
        
        if not token_data:
            fig, ax = plt.subplots(figsize=(10, 4))
            ax.text(0.5, 0.5, 'No data available for visualization', 
                   ha='center', va='center', transform=ax.transAxes, fontsize=14)
            ax.set_title('Score Distribution by Token Count', fontsize=14, fontweight='bold')
            return fig
        
        # Create subplots - one for each token count
        fig, axes = plt.subplots(len(token_data), 1, figsize=(10, 1 * len(token_data)), 
                                sharex=True)
        
        # Handle single subplot case
        if len(token_data) == 1:
            axes = [axes]
        
        for i, (token_count, scores) in enumerate(token_data):
            ax = axes[i]
            
            # Create horizontal violin plot
            parts = ax.violinplot([scores], positions=[0], vert=False, 
                                showmeans=True, showextrema=True)
            
            # Color based on whether this is user's token count - reuse existing gradient colors
            color = '#667eea' if token_count == user_tokens else '#764ba2'
            for pc in parts['bodies']:
                pc.set_facecolor(color)
                pc.set_alpha(0.7)
                pc.set_edgecolor('black')
                pc.set_linewidth(1)
            
            # Highlight user's score if this is their token count
            if token_count == user_tokens:
                ax.scatter(user_score, 0, color='red', s=150, zorder=5, 
                          marker='*', label=f'Your Score: {user_score:.3f}')
                ax.legend(loc='upper right')
            
            # Styling for each subplot
            ax.set_ylabel(f'{token_count} token{"s" if token_count != 1 else ""}\n(n={len(scores)})', 
                         fontsize=11, fontweight='bold')
            ax.set_yticks([])
            ax.grid(True, alpha=0.3, axis='x')
            ax.spines['top'].set_visible(False)
            ax.spines['right'].set_visible(False)
            ax.spines['left'].set_visible(False)
            
            # Set consistent y-limits for visual consistency
            ax.set_ylim(-0.4, 0.4)
        
        # Set common x-label only on bottom subplot
        axes[-1].set_xlabel('Creativity Score (Cosine Distance)', fontsize=12)
        
        # Overall title
        fig.suptitle('Score Distribution by Token Count', fontsize=14, fontweight='bold', y=0.98)
        
        plt.tight_layout()
        plt.subplots_adjust(top=0.92)
        return fig
    
    def calculate_session_ranking_stats(self, session_results, data_manager, scorer):
        """Calculate comprehensive ranking statistics for the session."""
        all_results = data_manager.get_results()
        ranking_stats = {
            "best_rank": None,
            "best_percentile": None,
            "average_percentile": 0.0,
            "total_ranked_attempts": 0,
            "ranking_trend": "stable",  # up, down, stable
            "recent_percentiles": []
        }
        
        if not session_results:
            return ranking_stats
        
        percentiles = []
        ranks = []
        
        # Calculate rankings for each session attempt
        for result in session_results:
            # Get all results for this specific prompt
            prompt_results = data_manager.filter_results_by_partial_response(
                all_results, result["prompt"], result["llm_partial_response"]
            )
            
            if len(prompt_results) >= 2:  # Need at least 2 results to rank
                rank, percentile = scorer.calculate_rank_and_percentile(
                    result["cosine_distance"], prompt_results, result["num_user_tokens"]
                )
                if rank and percentile is not None:
                    percentiles.append(percentile)
                    ranks.append(rank)
                    ranking_stats["recent_percentiles"].append({
                        "percentile": percentile,
                        "rank": rank,
                        "total": len([r for r in prompt_results if r["num_user_tokens"] == result["num_user_tokens"]]),
                        "timestamp": result["timestamp"]
                    })
        
        if percentiles:
            ranking_stats["total_ranked_attempts"] = len(percentiles)
            ranking_stats["average_percentile"] = sum(percentiles) / len(percentiles)
            ranking_stats["best_percentile"] = max(percentiles)
            ranking_stats["best_rank"] = min(ranks) if ranks else None
            
            # Determine trend (compare first half vs second half)
            if len(percentiles) >= 4:
                mid_point = len(percentiles) // 2
                first_half_avg = sum(percentiles[:mid_point]) / mid_point
                second_half_avg = sum(percentiles[mid_point:]) / (len(percentiles) - mid_point)
                
                if second_half_avg > first_half_avg + 10:
                    ranking_stats["ranking_trend"] = "up"
                elif second_half_avg < first_half_avg - 10:
                    ranking_stats["ranking_trend"] = "down"
        
        return ranking_stats