File size: 3,035 Bytes
f1f682e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import os
import re
import json
from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional
from utils import EvaluationRecord

class BaseDataset(ABC):
    def __init__(self, **kwargs):
        self.evaluation_records: List[EvaluationRecord] = []
        self.kwargs = kwargs
    def __len__(self):
        return len(self.evaluation_records)

    @abstractmethod
    def load_and_prepare(self):
        """
        Load data and populate the self.evaluation_records list.
        Each element is an EvaluationRecord object.
        """
        pass

    @abstractmethod
    def build_message(self) -> dict:
        """ Prepare the request message for inference and the format is OpenAI Chat Message Format: 

        {"role": "user", "content": [{"type": "text", "text":"xxx"}, {"type": "image", "image": "xx.png"}, {"type":"audio", "audio":"xx.mp3"}]}
        """
        pass

    @abstractmethod
    def build_score_message(self, record: EvaluationRecord) -> dict:
        """ Prepare the request message for scorer and the format is OpenAI Chat Message Format: 

        {"role": "user", "content": [{"type": "text", "text":"xxx"}}
        """
        pass

    @abstractmethod
    def compute_score(self, record: EvaluationRecord) -> float:
        """
        Compute score for a single completed record.

        :param record: An EvaluationRecord object with prediction filled.
        :return: Score (float).
        """
        pass

    @abstractmethod
    def compute_metrics(self) -> Dict[str, Any]:
        """Compute final aggregated metrics based on all records."""
        pass

    def save_results(self, file_path: str):
        """Save detailed results and final scores."""
        if not os.path.exists(os.path.dirname(file_path)):
            os.makedirs(os.path.dirname(file_path))
        EvaluationRecord.save_records_to_json(self.evaluation_records, file_path)
        print(f"Results saved to {file_path}")

    def load_results(self, file_path: str):
        """Load data from JSON file into evaluation_records."""
        if not os.path.exists(file_path):
            print(f"File {file_path} does not exist")
            return
        
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        self.evaluation_records = []
        for item in data:
            record = EvaluationRecord(
                id=item['id'],
                question=item['question'],
                message=item['message'],
                answer=item['answer'],
                response=item.get('response'),
                request_status=item.get('request_status', 'pending'),
                score_response=item.get('score_response'),
                score_status=item.get('score_status', 'pending'),
                score=item.get('score'),
                extra_info=item.get('extra_info', {})
            )
            self.evaluation_records.append(record)
        
        print(f"Loaded {len(self.evaluation_records)} records from {file_path}")