DenseLabelDev / vlm /datasets /evaluation /utils.py

Upload folder using huggingface_hub

032e687 verified 11 months ago

7.1 kB

	import copy

	import numpy as np
	from collections import defaultdict
	import json
	from xtuner.utils import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX
	from xtuner.tools.utils import is_cn_string
	from xtuner.dataset.utils import expand2square
	from PIL import Image
	import os

	def process_punctuation(inText):
	import re
	outText = inText
	punct = [
	';', r'/', '[', ']', '"', '{', '}', '(', ')', '=', '+', '\\', '_', '-',
	'>', '<', '@', '`', ',', '?', '!'
	]
	commaStrip = re.compile('(\d)(,)(\d)') # noqa: W605
	periodStrip = re.compile('(?!<=\d)(\.)(?!\d)') # noqa: W605
	for p in punct:
	if (p + ' ' in inText or ' ' + p in inText) or (re.search(
	commaStrip, inText) is not None):
	outText = outText.replace(p, '')
	else:
	outText = outText.replace(p, ' ')
	outText = periodStrip.sub('', outText, re.UNICODE)
	return outText


	def YOrN_Extraction(output):
	s = output.lower()
	words = process_punctuation(s).split()
	if 'yes' in words and 'no' not in words:
	return 'Yes'
	if 'yes' not in words and 'no' in words:
	return 'No'
	return 'Unknown'


	def MME_rating(data):
	stats = defaultdict(dict)
	lt = len(data)
	for i in range(lt):
	item = data.iloc[i]
	category = item['category']
	image_path = item['image_path']
	score = item['score']
	if image_path not in stats[category]:
	stats[category][image_path] = []
	stats[category][image_path].append(score)

	def acc(key, mode='normal'):
	res = stats[key]
	values = []
	for val in res.values():
	if mode == 'normal':
	values.extend(val)
	elif mode == 'plus':
	values.append(val[0] * val[1])
	return np.mean(values) * 100

	scores = {}
	for k in stats:
	scores[k] = acc(k) + acc(k, 'plus')

	super_cates = dict(
	perception=[
	'OCR', 'artwork', 'celebrity', 'color', 'count', 'existence',
	'landmark', 'position', 'posters', 'scene'
	],
	reasoning=['code_reasoning', 'commonsense_reasoning', 'numerical_calculation', 'text_translation']
	)

	ret = {}
	for sc, cate_list in super_cates.items():
	base = 0
	for c in cate_list:
	base += scores[c]
	ret[sc] = base
	ret.update(scores)
	return ret


	def Hallusion_rating(data):
	def calc_fAcc(data):
	res = defaultdict(list)
	lt = len(data)
	for i in range(lt):
	line = data.iloc[i]
	res[f"{line['l2-category']}_{line['set_id']}_{line['figure_id']}"].append(line['score'])
	return np.mean([np.all(x) for x in res.values()]) * 100

	def calc_qAcc(data):
	res = defaultdict(list)
	lt = len(data)
	for i in range(lt):
	line = data.iloc[i]
	res[f"{line['l2-category']}_{line['set_id']}_{line['question_id']}"].append(line['score'])
	return np.mean([np.all(x) for x in res.values()]) * 100

	def calc_aAcc(data):
	return np.mean(data['score']) * 100

	data['set_id'] = [x.split('_')[3] for x in data['index']]
	data['figure_id'] = [x.split('_')[4] for x in data['index']]
	data['question_id'] = [x.split('_')[5] for x in data['index']]

	res = dict(split=[], aAcc=[], fAcc=[], qAcc=[])
	res['split'].append('Overall')
	res['aAcc'].append(calc_aAcc(data))
	res['fAcc'].append(calc_fAcc(data))
	res['qAcc'].append(calc_qAcc(data))

	if 'category' in data:
	cates = list(set(data['category']))
	for c in cates:
	sub = data[data['category'] == c]
	res['split'].append(c)
	res['aAcc'].append(calc_aAcc(sub))
	res['fAcc'].append(calc_fAcc(sub))
	res['qAcc'].append(calc_qAcc(sub))

	if 'l2-category' in data:
	cates = list(set(data['l2-category']))
	for c in cates:
	sub = data[data['l2-category'] == c]
	res['split'].append(c)
	res['aAcc'].append(calc_aAcc(sub))
	res['fAcc'].append(calc_fAcc(sub))
	res['qAcc'].append(calc_qAcc(sub))
	return res


	def load_jsonl(json_file):
	with open(json_file) as f:
	lines = f.readlines()
	data = []
	for line in lines:
	data.append(json.loads(line))
	return data

	def custom_data_process(self, data, return_ori_image=False):
	metainfo = self.metainfo
	data_dict = {'img_id': data['img_id']}
	# 1 prepare text, the text only contain the <image> and text prompts
	# so, please add your template in the model.predict_forward()
	if metainfo['name'] == 'multiple_choice':
	# MultipleChoiceDataset
	data_dict['index'] = data['index']
	if data['context'] is not None:
	text = data['context'] + '\n' + data['question'] + '\n' + data['options']
	else:
	text = data['question'] + '\n' + data['options']
	text = DEFAULT_IMAGE_TOKEN + '\n' + text

	if is_cn_string(text):
	text = text + '请直接回答选项字母。'
	else:
	text = text + ("Answer with the option's letter from the " 'given choices directly.')
	elif metainfo['name'] in ['chartqa', 'gvqa']:
	# TODO prompt are different of vlmevalkit
	text = data['question'] + '\nAnswer the question using a single word or phrase.'
	text = DEFAULT_IMAGE_TOKEN + '\n' + text
	elif metainfo['name'] == 'tallyqa':
	text = data['question']
	text = text + "\nAnswer the question using a single number."
	text = DEFAULT_IMAGE_TOKEN + '\n' + text
	elif metainfo['name'] in ['hallusion', 'pope']:
	# TODO prompt are different of vlmevalkit
	text = data['question'] + '\nPlease answer the question with yes or no.'
	text = DEFAULT_IMAGE_TOKEN + '\n' + text
	else:
	text = data['question']
	if metainfo['name'] == 'mme':
	text = data['question'].replace('Please answer yes or no.',
	'Please answer the question only a single word yes or no.')
	text = DEFAULT_IMAGE_TOKEN + '\n' + text

	# 3 process image
	# if metainfo['name'] in ['mme', 'textvqa', 'gqa', 'tallyqa']:
	if metainfo['name'] in ['textvqa', 'gqa', 'tallyqa']:
	# MMEDataset or TextVQADataset
	image_folder = self.image_folder
	image = Image.open(os.path.join(image_folder, data['image_path'])).convert('RGB')
	else:
	image = self.get_image(data['img']).convert('RGB')
	ori_image = copy.deepcopy(image)
	ori_width, ori_height = image.size

	if self.pad_image_to_square:
	image = expand2square(image, tuple(int(x * 255) for x in self.image_processor.image_mean))

	image = self.image_processor.preprocess(
	image, return_tensors='pt')['pixel_values'][0]

	data_dict['pixel_values'] = image
	data_dict['text_prompts'] = text
	data_dict['ori_image_size'] = (ori_width, ori_height)
	if return_ori_image:
	data_dict['ori_image'] = ori_image
	return data_dict