File size: 3,852 Bytes
9366995
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""
Helper functions for creating evaluation results in the standardized format.
"""
from typing import List, Optional
from custom_types import (
    Utterance, EvaluationResult, UtteranceScore, SegmentScore,
    CategoricalScore, NumericalScore, MetricScore
)


def create_categorical_score(
    label: str,
    confidence: Optional[float] = None
) -> CategoricalScore:
    """
    Create a categorical score.
    
    Args:
        label: Category label (e.g., "High", "Change", "Positive")
        confidence: Optional confidence score 0-1
        
    Returns:
        CategoricalScore
    """
    return {
        "type": "categorical",
        "label": label,
        "confidence": confidence
    }


def create_numerical_score(
    value: float,
    max_value: float,
    label: Optional[str] = None
) -> NumericalScore:
    """
    Create a numerical score.
    
    Args:
        value: The score value
        max_value: Maximum possible score
        label: Optional derived label (e.g., "High" if value > threshold)
        
    Returns:
        NumericalScore
    """
    return {
        "type": "numerical",
        "value": value,
        "max_value": max_value,
        "label": label
    }


def create_utterance_result(
    conversation: List[Utterance],
    scores_per_utterance: List[dict[str, MetricScore]]
) -> EvaluationResult:
    """
    Create an utterance-level evaluation result.
    
    Args:
        conversation: The full conversation
        scores_per_utterance: List of metric scores, one dict per utterance
        
    Returns:
        EvaluationResult with granularity="utterance"
    """
    per_utterance: List[UtteranceScore] = []
    for i, scores in enumerate(scores_per_utterance):
        per_utterance.append({
            "index": i,
            "metrics": scores
        })
    
    return {
        "granularity": "utterance",
        "overall": None,
        "per_utterance": per_utterance,
        "per_segment": None
    }


def create_conversation_result(
    overall_scores: dict[str, MetricScore]
) -> EvaluationResult:
    """
    Create a conversation-level evaluation result.
    
    Args:
        overall_scores: Aggregate scores for the entire conversation
        
    Returns:
        EvaluationResult with granularity="conversation"
    """
    return {
        "granularity": "conversation",
        "overall": overall_scores,
        "per_utterance": None,
        "per_segment": None
    }


def create_segment_result(
    segments: List[tuple[List[int], dict[str, MetricScore]]]
) -> EvaluationResult:
    """
    Create a segment-level evaluation result.
    
    Args:
        segments: List of (utterance_indices, scores) tuples
        
    Returns:
        EvaluationResult with granularity="segment"
    """
    per_segment: List[SegmentScore] = []
    for utterance_indices, scores in segments:
        per_segment.append({
            "utterance_indices": utterance_indices,
            "metrics": scores
        })
    
    return {
        "granularity": "segment",
        "overall": None,
        "per_utterance": None,
        "per_segment": per_segment
    }


def derive_label_from_score(value: float, max_value: float, thresholds: Optional[dict] = None) -> str:
    """
    Derive a categorical label from a numerical score.
    
    Args:
        value: The score value
        max_value: Maximum possible score
        thresholds: Optional custom thresholds. Default: {"Low": 0.33, "Medium": 0.66, "High": 1.0}
        
    Returns:
        Label string
    """
    if thresholds is None:
        thresholds = {"Low": 0.33, "Medium": 0.66, "High": 1.0}
    
    ratio = value / max_value
    for label, threshold in sorted(thresholds.items(), key=lambda x: x[1]):
        if ratio <= threshold:
            return label
    
    return list(thresholds.keys())[-1]