github-actions[bot]
Deploy from GitHub Actions (commit: 8b247ffacd77c0672965b8378f1d52a7dcd187ae)
9366995
| from abc import ABC, abstractmethod | |
| from typing import List | |
| from custom_types import Utterance, EvaluationResult | |
| class Evaluator(ABC): | |
| """ | |
| Base class for all evaluators. | |
| Each evaluator should compute exactly one metric. | |
| """ | |
| # Subclasses should define this | |
| METRIC_NAME: str = None | |
| def __init__(self): | |
| super().__init__() | |
| if self.METRIC_NAME is None: | |
| raise NotImplementedError(f"{self.__class__.__name__} must define METRIC_NAME") | |
| def execute(self, conversation: List[Utterance], **kwargs) -> EvaluationResult: | |
| """ | |
| Evaluate a conversation. | |
| Args: | |
| conversation: Full conversation as list of utterances. | |
| Each utterance has keys: 'speaker', 'text'. | |
| **kwargs: Additional evaluator-specific parameters | |
| Returns: | |
| EvaluationResult with one of three granularities: | |
| - "utterance": per_utterance contains scores for each utterance | |
| - "segment": per_segment contains scores for utterance groups | |
| - "conversation": overall contains aggregate scores for entire conversation | |
| """ | |
| ... |