Multi-modal_10B_CN / llava /eval /summarize_gpt_review.py

Upload 77 files

c98c191 over 2 years ago

1.16 kB

	import json
	import os
	from collections import defaultdict

	import numpy as np

	import argparse

	def parse_args():
	parser = argparse.ArgumentParser(description='ChatGPT-based QA evaluation.')
	parser.add_argument('-d', '--dir')
	return parser.parse_args()


	if __name__ == '__main__':
	args = parse_args()
	review_files = [x for x in os.listdir(args.dir) if x.endswith('.jsonl') and (x.startswith('gpt4_text') or x.startswith('reviews_'))]

	for review_file in sorted(review_files):
	config = review_file.replace('gpt4_text_', '').replace('.jsonl', '')
	scores = defaultdict(list)
	print(f'GPT-4 vs. {config}')
	with open(os.path.join(args.dir, review_file)) as f:
	for review_str in f:
	review = json.loads(review_str)
	scores[review['category']].append(review['tuple'])
	scores['all'].append(review['tuple'])
	for k, v in scores.items():
	stats = np.asarray(v).mean(0).tolist()
	stats = [round(x, 3) for x in stats]
	print(k, stats, round(stats[1]/stats[0]*100, 1))
	print('=================================')