| import json |
| import jsonlines |
| import os |
|
|
| dir_path = '/root/.cache/huggingface/hub/datasets--RUCAIBox--gaokao-bench/snapshots/49877cf53b6db9c24d7d285161fc12bba2f85d29/test' |
| files = os.listdir(dir_path) |
|
|
| subject = [ 'Chemistry', 'Geography', 'Math', 'History', 'Biology', 'Political', 'Chinese', 'Physics' ] |
|
|
| len = 0 |
| data = [] |
| for file in files: |
| if 'English' in file: |
| continue |
| data_item = {} |
| for sub in subject: |
| if sub in file: |
| data_item['keyword'] = sub |
| data_item_list = [] |
| path = os.path.join(dir_path, file) |
| with open(path, 'r+', encoding='utf-8') as file: |
| for line in jsonlines.Reader(file): |
| dict_item = {} |
| dict_item['question'] = line['question'] |
| dict_item['answer'] = line['answer'] |
| dict_item['analysis'] = line['analysis'] |
| |
|
|
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| print(dict_item) |
| |
| data_item_list.append(dict_item) |
| data_item['question'] = data_item_list |
| data.append(data_item) |
|
|
| with open('RUC_RAG.json', 'w', encoding='utf-8') as file: |
| |
| json.dump(data, file, ensure_ascii=False, indent=4) |