| import json |
| import os.path as osp |
|
|
| from datasets import Dataset, DatasetDict |
|
|
| from opencompass.registry import LOAD_DATASET |
|
|
| from .base import BaseDataset |
|
|
|
|
| @LOAD_DATASET.register_module() |
| class CMBDataset(BaseDataset): |
|
|
| @staticmethod |
| def load(path: str): |
| with open(osp.join(path, 'val.json'), 'r', encoding='utf-8') as f: |
| val_data = json.load(f) |
| for d in val_data: |
| d['option_str'] = '\n'.join( |
| [f'{k}. {v}' for k, v in d['option'].items() if len(v) > 1]) |
| d['answer'] = 'NULL' |
| val_dataset = Dataset.from_list(val_data) |
|
|
| with open(osp.join(path, 'test.json'), 'r', encoding='utf-8') as f: |
| test_data = json.load(f) |
| for d in test_data: |
| d['option_str'] = '\n'.join( |
| [f'{k}. {v}' for k, v in d['option'].items() if len(v) > 1]) |
| test_dataset = Dataset.from_list(test_data) |
|
|
| return DatasetDict({'val': val_dataset, 'test': test_dataset}) |
|
|