import os import re import json from abc import ABC, abstractmethod from typing import List, Dict, Any, Optional from utils import EvaluationRecord class BaseDataset(ABC): def __init__(self, **kwargs): self.evaluation_records: List[EvaluationRecord] = [] self.kwargs = kwargs def __len__(self): return len(self.evaluation_records) @abstractmethod def load_and_prepare(self): """ Load data and populate the self.evaluation_records list. Each element is an EvaluationRecord object. """ pass @abstractmethod def build_message(self) -> dict: """ Prepare the request message for inference and the format is OpenAI Chat Message Format: {"role": "user", "content": [{"type": "text", "text":"xxx"}, {"type": "image", "image": "xx.png"}, {"type":"audio", "audio":"xx.mp3"}]} """ pass @abstractmethod def build_score_message(self, record: EvaluationRecord) -> dict: """ Prepare the request message for scorer and the format is OpenAI Chat Message Format: {"role": "user", "content": [{"type": "text", "text":"xxx"}} """ pass @abstractmethod def compute_score(self, record: EvaluationRecord) -> float: """ Compute score for a single completed record. :param record: An EvaluationRecord object with prediction filled. :return: Score (float). """ pass @abstractmethod def compute_metrics(self) -> Dict[str, Any]: """Compute final aggregated metrics based on all records.""" pass def save_results(self, file_path: str): """Save detailed results and final scores.""" if not os.path.exists(os.path.dirname(file_path)): os.makedirs(os.path.dirname(file_path)) EvaluationRecord.save_records_to_json(self.evaluation_records, file_path) print(f"Results saved to {file_path}") def load_results(self, file_path: str): """Load data from JSON file into evaluation_records.""" if not os.path.exists(file_path): print(f"File {file_path} does not exist") return with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) self.evaluation_records = [] for item in data: record = EvaluationRecord( id=item['id'], question=item['question'], message=item['message'], answer=item['answer'], response=item.get('response'), request_status=item.get('request_status', 'pending'), score_response=item.get('score_response'), score_status=item.get('score_status', 'pending'), score=item.get('score'), extra_info=item.get('extra_info', {}) ) self.evaluation_records.append(record) print(f"Loaded {len(self.evaluation_records)} records from {file_path}")