GSheep's picture
add score:0.224 and score:0.26
2d3c5df
import json
import jsonlines
import os
dir_path = '/root/.cache/huggingface/hub/datasets--RUCAIBox--gaokao-bench/snapshots/49877cf53b6db9c24d7d285161fc12bba2f85d29/test'
files = os.listdir(dir_path)
subject = [ 'Chemistry', 'Geography', 'Math', 'History', 'Biology', 'Political', 'Chinese', 'Physics' ]
len = 0
data = []
for file in files:
if 'English' in file:
continue
data_item = {}
for sub in subject:
if sub in file:
data_item['keyword'] = sub
data_item_list = []
path = os.path.join(dir_path, file)
with open(path, 'r+', encoding='utf-8') as file:
for line in jsonlines.Reader(file):
dict_item = {}
dict_item['question'] = line['question']
dict_item['answer'] = line['answer']
dict_item['analysis'] = line['analysis']
# len += 1
# dict_item['image'] = ""
# conversion = []
# human = {}
# human['from'] = 'human'
# human['value'] = line['question']
# gpt = {}
# gpt['from'] = 'gpt'
# result = line['analysis']
# result += "答案是:" + ''.join(line['answer'])
# gpt['value'] = result
# conversion.append(human)
# conversion.append(gpt)
# dict_item['conversations'] = conversion
print(dict_item)
data_item_list.append(dict_item)
data_item['question'] = data_item_list
data.append(data_item)
with open('RUC_RAG.json', 'w', encoding='utf-8') as file:
# 使用json.dump()函数将字典写入文件
json.dump(data, file, ensure_ascii=False, indent=4)