File size: 1,221 Bytes
cff1e0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from abc import ABC, abstractmethod
from typing import List
from custom_types import Utterance, EvaluationResult


class Evaluator(ABC):
    """
    Base class for all evaluators.
    Each evaluator should compute exactly one metric.
    """
    
    # Subclasses should define this
    METRIC_NAME: str = None
    
    def __init__(self):
        super().__init__()
        if self.METRIC_NAME is None:
            raise NotImplementedError(f"{self.__class__.__name__} must define METRIC_NAME")

    @abstractmethod
    def execute(self, conversation: List[Utterance], **kwargs) -> EvaluationResult:
        """
        Evaluate a conversation.
        
        Args:
            conversation: Full conversation as list of utterances.
                         Each utterance has keys: 'speaker', 'text'.
            **kwargs: Additional evaluator-specific parameters
            
        Returns:
            EvaluationResult with one of three granularities:
            - "utterance": per_utterance contains scores for each utterance
            - "segment": per_segment contains scores for utterance groups
            - "conversation": overall contains aggregate scores for entire conversation
        """
        ...