File size: 1,893 Bytes
25bcc11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""Base class for evaluation harnesses."""

from abc import ABC, abstractmethod
from typing import Any, Dict

from openenv.core.evals.types import EvalConfig, EvalResult


class EvalHarness(ABC):
    """Abstract base class for evaluation harnesses.

    Subclasses implement run() to define evaluation logic.
    """

    @abstractmethod
    def run(
        self,
        harness_version: str,
        library_versions: Dict[str, str],
        dataset: str,
        eval_parameters: Dict[str, Any],
    ) -> Dict[str, Any]:
        """Run the evaluation and return scores.

        Args:
            harness_version: Version of the evaluation harness.
            library_versions: Versions of libraries used in the evaluation.
            dataset: Name of the dataset to evaluate on.
            eval_parameters: Parameters for the evaluation.

        Returns:
            Dictionary of scores from the evaluation.
        """
        raise NotImplementedError

    def run_from_config(self, config: EvalConfig) -> EvalResult:
        """Run evaluation from an EvalConfig and return an EvalResult.

        Args:
            config: Configuration for the evaluation.

        Returns:
            EvalResult containing the config and scores.
        """
        scores = self.run(
            harness_version=config.harness_version,
            library_versions=config.library_versions,
            dataset=config.dataset,
            eval_parameters=config.eval_parameters,
        )
        return EvalResult(config=config, scores=scores)

    @property
    def name(self) -> str:
        """Return the name of the harness (class name)."""
        return self.__class__.__name__