File size: 1,221 Bytes
cff1e0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
from abc import ABC, abstractmethod
from typing import List
from custom_types import Utterance, EvaluationResult
class Evaluator(ABC):
"""
Base class for all evaluators.
Each evaluator should compute exactly one metric.
"""
# Subclasses should define this
METRIC_NAME: str = None
def __init__(self):
super().__init__()
if self.METRIC_NAME is None:
raise NotImplementedError(f"{self.__class__.__name__} must define METRIC_NAME")
@abstractmethod
def execute(self, conversation: List[Utterance], **kwargs) -> EvaluationResult:
"""
Evaluate a conversation.
Args:
conversation: Full conversation as list of utterances.
Each utterance has keys: 'speaker', 'text'.
**kwargs: Additional evaluator-specific parameters
Returns:
EvaluationResult with one of three granularities:
- "utterance": per_utterance contains scores for each utterance
- "segment": per_segment contains scores for utterance groups
- "conversation": overall contains aggregate scores for entire conversation
"""
... |