hhh / evaluators /base.py
github-actions[bot]
Deploy from GitHub Actions (commit: 8b247ffacd77c0672965b8378f1d52a7dcd187ae)
9366995
from abc import ABC, abstractmethod
from typing import List
from custom_types import Utterance, EvaluationResult
class Evaluator(ABC):
"""
Base class for all evaluators.
Each evaluator should compute exactly one metric.
"""
# Subclasses should define this
METRIC_NAME: str = None
def __init__(self):
super().__init__()
if self.METRIC_NAME is None:
raise NotImplementedError(f"{self.__class__.__name__} must define METRIC_NAME")
@abstractmethod
def execute(self, conversation: List[Utterance], **kwargs) -> EvaluationResult:
"""
Evaluate a conversation.
Args:
conversation: Full conversation as list of utterances.
Each utterance has keys: 'speaker', 'text'.
**kwargs: Additional evaluator-specific parameters
Returns:
EvaluationResult with one of three granularities:
- "utterance": per_utterance contains scores for each utterance
- "segment": per_segment contains scores for utterance groups
- "conversation": overall contains aggregate scores for entire conversation
"""
...