leaderboard

Running

App Files Files Community

leaderboard / evaluation /evaluator.py

Ori

Update evaluation/evaluator.py

00b102e verified about 1 year ago

raw

history blame contribute delete

3.3 kB

	import json
	from evaluation.evaluate_utils.evaluate_factory import get_evaluator
	import numpy as np


	def find_isnan(samp):
	try:
	if np.isnan(samp):
	return True
	else:
	return False
	except:
	return False


	def fix_ans(answer):
	try:
	answer = answer.replace("{'", '{"').replace("', '", '", "').replace("': '", '": "').replace("'}", '"}')
	answer = answer.replace("': ", '": ')
	return answer
	except:
	return answer


	def parse_answer(answer):
	if len(answer) == 1:
	ans, is_num = fix_number(answer[0])
	if is_num:
	return ans, 'number'
	try:
	ans = json.loads(fix_ans(answer[0]))
	return [ans], 'json'
	except:
	ans, is_num = fix_number(answer[0])
	if is_num:
	return ans, 'number'
	else:
	return answer[0], 'string'
	else:
	try:
	ans = [json.loads(fix_ans(ex)) for ex in answer]
	return ans, 'json'
	except:
	return answer, "string list"


	def fix_number(number):
	if type(number) == str:
	copy_ans = number
	copy_ans = ' '.join(' '.join(' '.join(copy_ans.split('$')).split('%')).split('sqft')).strip()
	copy_ans = copy_ans.strip()
	copy_ans = copy_ans.replace(',', '.').replace(' square kilometers', '')
	try:
	return float(copy_ans), True
	except:
	return number, False
	elif type(number) == int:
	return float(number), True
	else:
	return number, True


	def fix_prediction(prediction, gold_answer, evaluator):
	if type(prediction) == list and len(prediction) == 1 and (
	type(prediction[0]) == int or ((type(prediction[0]) == str) and prediction[0].isnumeric())):
	prediction = fix_number(prediction[0])

	if type(prediction) != list:
	prediction, is_num = fix_number(prediction)
	if evaluator == 'json':
	try:
	prediction = [json.loads(pred) for pred in prediction.split('\n')]
	except:
	prediction = [prediction]

	if (hasattr(type(prediction), '__len__')) and (len(prediction) == 0):
	return prediction, False

	if (type(prediction) == list and len(prediction) > 1) and type(gold_answer) == float:
	return prediction, False

	return prediction, True


	def question_scorer(prediction, gold_answer):
	try:
	prediction = json.loads(prediction)
	except:
	prediction = prediction

	answer_list = [x for x in gold_answer.split("\n") if len(x.strip()) > 0] if type(
	gold_answer) != list else gold_answer
	gold_answer, evaluator = parse_answer(answer_list)
	prediction, run_eval = fix_prediction(prediction, gold_answer, evaluator)

	has_ans = 1.
	if (type(prediction) != float and len(prediction) == 0) or find_isnan(prediction):
	has_ans = 0.

	if type(prediction) == list:
	if all((type(pred) not in {float, int} and len(pred) == 0) or find_isnan(pred) for pred in prediction):
	has_ans = 0

	if not run_eval:
	return 0., has_ans

	metric_eval = get_evaluator(evaluator)
	accuracy = metric_eval(prediction, gold_answer)
	return accuracy, has_ans