File size: 1,808 Bytes
5374a2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# Acknowledgement: Modified from AFlow (https://github.com/geekan/MetaGPT/blob/main/metagpt/ext/aflow/scripts/optimizer_utils/evaluation_utils.py) under MIT License 

from ...evaluators.aflow_evaluator import AFlowEvaluator
from ...core.callbacks import suppress_logger_info
from ...core.logging import logger

class EvaluationUtils:

    def __init__(self, root_path: str):
        self.root_path = root_path
    
    async def evaluate_graph_async(self, optimizer, validation_n, data, initial=False):

        evaluator = AFlowEvaluator(llm=optimizer.executor_llm)
        sum_score = 0
        
        for _ in range(validation_n):

            with suppress_logger_info():
                score, avg_cost, total_cost, all_failed = await evaluator.graph_evaluate_async(optimizer.benchmark, optimizer.graph, is_test=False)
            cur_round = optimizer.round + 1 if initial is False else optimizer.round 
            new_data = optimizer.data_utils.create_result_data(cur_round, score, avg_cost, total_cost)
            data.append(new_data)

            result_path = optimizer.data_utils.get_results_file_path(self.root_path)
            optimizer.data_utils.save_results(result_path, data)
            
            sum_score += score

            if all_failed:
                logger.warning(f"All test cases failed in round {cur_round}. Stopping evaluation for this round.")
                break 
            
        return sum_score / validation_n

    async def evaluate_graph_test_async(self, optimizer):

        evaluator = AFlowEvaluator(llm=optimizer.executor_llm)
        with suppress_logger_info():
            score, avg_cost, total_cost, all_failed = await evaluator.graph_evaluate_async(optimizer.benchmark, optimizer.graph, is_test=True)
        return score, avg_cost, total_cost