my-llava-moss2 / LLaVA-MOSS2 /add_cmmlu.py
GSheep's picture
add score:0.224 and score:0.26
2d3c5df
import pandas as pd
import os
import json
with open('./political_data_with_extra_data.json', 'r', encoding='utf-8') as file:
data = json.load(file)
len = len(data)
final_folder = 'playground/data/cmmlu'
files = os.listdir(final_folder)
selected_files = ['combined_anatomy.csv','combined_ancient_chinese.csv','combined_arts.csv','combined_chinese_civil_service_exam.csv','combined_chinese_foreign_policy.csv',
'combined_chinese_history.csv','combined_college_education.csv', 'combined_college_engineering_hydrology.csv', 'combined_college_mathematics.csv', 'combined_college_medicine.csv',
'combined_conceptual_physics.csv','combined_electrical_engineering.csv','combined_elementary_mathematics.csv','combined_food_science.csv',
'combined_genetics.csv', 'combined_high_school_biology.csv', 'combined_high_school_chemistry.csv','combined_high_school_geography.csv','combined_high_school_mathematics.csv',
'combined_high_school_physics.csv','combined_high_school_politics.csv','combined_legal_and_moral_basis.csv','combined_management.csv','combined_marxist_theory.csv',
'combined_modern_chinese.csv','combined_philosophy.csv','combined_virology.csv','combined_world_history.csv']
cmmlu_list = []
for file_name in selected_files:
path = os.path.join(final_folder, file_name)
df = pd.read_csv(path)
for index, row in df.iterrows():
dict_item = {}
dict_item['id'] = str(len)
len+=1
dict_item['image'] = ""
conversion = []
human = {}
human['from'] = 'human'
question = row['Question'] + '\nA.' + row['A'] + '\nB.' + row['B'] + '\nC.' + row['C'] + '\nD' + row['D'] + '\n'
human['value'] = question
gpt = {}
gpt['from'] = 'gpt'
result = "答案是:" + row['Answer']
gpt['value'] = result
conversion.append(human)
conversion.append(gpt)
dict_item['conversations'] = conversion
print(dict_item)
cmmlu_list.append(dict_item)
data = cmmlu_list + data
with open('cmmlu_political_data_gaokao.json', 'w', encoding='utf-8') as file:
# 使用json.dump()函数将字典写入文件
json.dump(data, file, ensure_ascii=False, indent=4)