File size: 1,674 Bytes
cacd4d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
"""
Base evaluator class for all evaluation strategies.
"""

from abc import ABC, abstractmethod
from typing import Any, Dict, Optional
import logging

logger = logging.getLogger(__name__)

class BaseEvaluator(ABC):
    """
    Abstract base class for all evaluation strategies.
    
    This enforces a consistent interface while allowing complete customization
    of evaluation logic for any use case.
    """
    
    def __init__(self, metric_weights: Optional[Dict[str, float]] = None):
        """
        Initialize evaluator with optional metric weights.
        
        Args:
            metric_weights: Optional weights for different metrics.
                          If None, subclasses should provide defaults.
        """
        self.metric_weights = metric_weights or {}
        self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
    
    @abstractmethod
    def evaluate(self, predicted: Any, expected: Any) -> Dict[str, float]:
        """
        Evaluate predicted output against expected output.
        
        Args:
            predicted: The model's predicted output
            expected: The ground truth expected output
            
        Returns:
            Dictionary with metric names as keys and scores as values.
            Must include 'composite_score' key for GEPA integration.
        """
        pass
    
    def validate_weights(self) -> bool:
        """Validate that metric weights sum to approximately 1.0"""
        if not self.metric_weights:
            return True
        
        total = sum(self.metric_weights.values())
        return abs(total - 1.0) < 0.01  # Allow small floating point errors