# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. """Base class for evaluation harnesses.""" from abc import ABC, abstractmethod from typing import Any, Dict from openenv.core.evals.types import EvalConfig, EvalResult class EvalHarness(ABC): """Abstract base class for evaluation harnesses. Subclasses implement run() to define evaluation logic. """ @abstractmethod def run( self, harness_version: str, library_versions: Dict[str, str], dataset: str, eval_parameters: Dict[str, Any], ) -> Dict[str, Any]: """Run the evaluation and return scores. Args: harness_version: Version of the evaluation harness. library_versions: Versions of libraries used in the evaluation. dataset: Name of the dataset to evaluate on. eval_parameters: Parameters for the evaluation. Returns: Dictionary of scores from the evaluation. """ raise NotImplementedError def run_from_config(self, config: EvalConfig) -> EvalResult: """Run evaluation from an EvalConfig and return an EvalResult. Args: config: Configuration for the evaluation. Returns: EvalResult containing the config and scores. """ scores = self.run( harness_version=config.harness_version, library_versions=config.library_versions, dataset=config.dataset, eval_parameters=config.eval_parameters, ) return EvalResult(config=config, scores=scores) @property def name(self) -> str: """Return the name of the harness (class name).""" return self.__class__.__name__