File size: 2,021 Bytes
336f4a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import asyncio

from weave import Dataset, Evaluation, Model, Scorer


class Evaluator:
    """Evaluate a model on a dataset using a list of scorers.

    Attributes:
        evaluation_name (str): The name of the evaluation run.
        evaluation_dataset (Dataset): The dataset used for evaluation.
        evaluation_scorers (list[Scorer]): A list of scorer objects used to evaluate the pipeline.
        pipeline (Model): The pipeline (model) to be evaluated.
    """

    def __init__(
        self,
        evaluation_name: str,
        evaluation_dataset: Dataset,
        evaluation_scorers: list[Scorer],
        pipeline: Model,
    ):
        """Initialize the Evaluator instance with the specified evaluation parameters.

        Args:
            evaluation_name (str): A unique identifier for the evaluation run.
            evaluation_dataset (Dataset): A `Dataset` object representing the data for evaluation.
            evaluation_scorers (list[Scorer]): A list of `Scorer` objects that calculate various metrics.
            pipeline (Model): The model or pipeline to evaluate.
        """
        self.evaluation_name = evaluation_name
        self.evaluation_dataset = evaluation_dataset
        self.evaluation_scorers = evaluation_scorers
        self.pipeline = pipeline

    def evaluate(self) -> None:
        """Perform evaluation of the pipeline using the specified dataset and scorers.

        This method creates an `Evaluation` object, executes the evaluation process, and
        returns the results as a dictionary.
        """
        evaluation = Evaluation(
            evaluation_name=self.evaluation_name,
            dataset=self.evaluation_dataset,
            scorers=self.evaluation_scorers,
        )

        try:
            evaluation_results = asyncio.run(evaluation.evaluate(self.pipeline))
        except Exception as exception:
            msg = f"Evaluation run failed: {exception}"
            raise RuntimeError(msg) from exception

        return evaluation_results