GSheep
/

my-llava-moss2

Model card Files Files and versions

my-llava-moss2 / LLaVA-MOSS2 /add_cmmlu.py

GSheep's picture

add score:0.224 and score:0.26

2d3c5df over 1 year ago

history blame contribute delete

2.29 kB

	import pandas as pd
	import os
	import json

	with open('./political_data_with_extra_data.json', 'r', encoding='utf-8') as file:
	data = json.load(file)
	len = len(data)

	final_folder = 'playground/data/cmmlu'

	files = os.listdir(final_folder)

	selected_files = ['combined_anatomy.csv','combined_ancient_chinese.csv','combined_arts.csv','combined_chinese_civil_service_exam.csv','combined_chinese_foreign_policy.csv',
	'combined_chinese_history.csv','combined_college_education.csv', 'combined_college_engineering_hydrology.csv', 'combined_college_mathematics.csv', 'combined_college_medicine.csv',
	'combined_conceptual_physics.csv','combined_electrical_engineering.csv','combined_elementary_mathematics.csv','combined_food_science.csv',
	'combined_genetics.csv', 'combined_high_school_biology.csv', 'combined_high_school_chemistry.csv','combined_high_school_geography.csv','combined_high_school_mathematics.csv',
	'combined_high_school_physics.csv','combined_high_school_politics.csv','combined_legal_and_moral_basis.csv','combined_management.csv','combined_marxist_theory.csv',
	'combined_modern_chinese.csv','combined_philosophy.csv','combined_virology.csv','combined_world_history.csv']
	cmmlu_list = []
	for file_name in selected_files:
	path = os.path.join(final_folder, file_name)
	df = pd.read_csv(path)

	for index, row in df.iterrows():
	dict_item = {}
	dict_item['id'] = str(len)
	len+=1

	dict_item['image'] = ""

	conversion = []
	human = {}
	human['from'] = 'human'
	question = row['Question'] + '\nA.' + row['A'] + '\nB.' + row['B'] + '\nC.' + row['C'] + '\nD' + row['D'] + '\n'
	human['value'] = question
	gpt = {}
	gpt['from'] = 'gpt'
	result = "答案是：" + row['Answer']
	gpt['value'] = result
	conversion.append(human)
	conversion.append(gpt)
	dict_item['conversations'] = conversion

	print(dict_item)

	cmmlu_list.append(dict_item)

	data = cmmlu_list + data

	with open('cmmlu_political_data_gaokao.json', 'w', encoding='utf-8') as file:
	# 使用json.dump()函数将字典写入文件
	json.dump(data, file, ensure_ascii=False, indent=4)