File size: 1,106 Bytes
1482463
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import json
from typing import List, Dict


class DataLoader:
    """Load and process datasets"""
    
    @staticmethod
    def load_data(file_path: str) -> List[Dict[str, str]]:
        """
        Load data from JSON file
        Expected format: [{"query": "...", "answer": "..."}, ...]
        """
        with open(file_path, 'r') as f:
            if file_path.endswith('.jsonl'):
                data = [json.loads(line) for line in f]
            else:
                data = json.load(f)
        
        # Validate data format
        for item in data:
            if 'query' not in item or 'answer' not in item:
                raise ValueError("Each data item must have 'query' and 'answer' fields")
        
        return data
    
    @staticmethod
    def load_math_dataset(file_path: str) -> List[Dict[str, str]]:
        """Load MATH or GSM8K format dataset"""
        return DataLoader.load_data(file_path)
    
    @staticmethod
    def load_mmlu_dataset(file_path: str) -> List[Dict[str, str]]:
        """Load MMLU-Pro format dataset"""
        return DataLoader.load_data(file_path)