Spaces:
Running
Running
Update evaluation/evaluator.py
Browse files- evaluation/evaluator.py +11 -10
evaluation/evaluator.py
CHANGED
|
@@ -2,8 +2,8 @@ import json
|
|
| 2 |
from evaluation.evaluate_utils.evaluate_factory import get_evaluator
|
| 3 |
import numpy as np
|
| 4 |
|
| 5 |
-
def find_isnan(samp):
|
| 6 |
|
|
|
|
| 7 |
try:
|
| 8 |
if np.isnan(samp):
|
| 9 |
return True
|
|
@@ -11,10 +11,9 @@ def find_isnan(samp):
|
|
| 11 |
return False
|
| 12 |
except:
|
| 13 |
return False
|
| 14 |
-
|
| 15 |
|
| 16 |
-
def fix_ans(answer):
|
| 17 |
|
|
|
|
| 18 |
try:
|
| 19 |
answer = answer.replace("{'", '{"').replace("', '", '", "').replace("': '", '": "').replace("'}", '"}')
|
| 20 |
answer = answer.replace("': ", '": ')
|
|
@@ -24,7 +23,6 @@ def fix_ans(answer):
|
|
| 24 |
|
| 25 |
|
| 26 |
def parse_answer(answer):
|
| 27 |
-
|
| 28 |
if len(answer) == 1:
|
| 29 |
ans, is_num = fix_number(answer[0])
|
| 30 |
if is_num:
|
|
@@ -47,7 +45,6 @@ def parse_answer(answer):
|
|
| 47 |
|
| 48 |
|
| 49 |
def fix_number(number):
|
| 50 |
-
|
| 51 |
if type(number) == str:
|
| 52 |
copy_ans = number
|
| 53 |
copy_ans = ' '.join(' '.join(' '.join(copy_ans.split('$')).split('%')).split('sqft')).strip()
|
|
@@ -64,8 +61,8 @@ def fix_number(number):
|
|
| 64 |
|
| 65 |
|
| 66 |
def fix_prediction(prediction, gold_answer, evaluator):
|
| 67 |
-
|
| 68 |
-
|
| 69 |
prediction = fix_number(prediction[0])
|
| 70 |
|
| 71 |
if type(prediction) != list:
|
|
@@ -86,20 +83,24 @@ def fix_prediction(prediction, gold_answer, evaluator):
|
|
| 86 |
|
| 87 |
|
| 88 |
def question_scorer(prediction, gold_answer):
|
| 89 |
-
|
| 90 |
try:
|
| 91 |
prediction = json.loads(prediction)
|
| 92 |
except:
|
| 93 |
prediction = prediction
|
| 94 |
|
| 95 |
-
answer_list = [x for x in gold_answer.split("\n") if len(x.strip()) > 0] if type(
|
|
|
|
| 96 |
gold_answer, evaluator = parse_answer(answer_list)
|
| 97 |
prediction, run_eval = fix_prediction(prediction, gold_answer, evaluator)
|
| 98 |
|
| 99 |
has_ans = 1.
|
| 100 |
if (type(prediction) != float and len(prediction) == 0) or find_isnan(prediction):
|
| 101 |
has_ans = 0.
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
if not run_eval:
|
| 104 |
return 0., has_ans
|
| 105 |
|
|
|
|
| 2 |
from evaluation.evaluate_utils.evaluate_factory import get_evaluator
|
| 3 |
import numpy as np
|
| 4 |
|
|
|
|
| 5 |
|
| 6 |
+
def find_isnan(samp):
|
| 7 |
try:
|
| 8 |
if np.isnan(samp):
|
| 9 |
return True
|
|
|
|
| 11 |
return False
|
| 12 |
except:
|
| 13 |
return False
|
|
|
|
| 14 |
|
|
|
|
| 15 |
|
| 16 |
+
def fix_ans(answer):
|
| 17 |
try:
|
| 18 |
answer = answer.replace("{'", '{"').replace("', '", '", "').replace("': '", '": "').replace("'}", '"}')
|
| 19 |
answer = answer.replace("': ", '": ')
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
def parse_answer(answer):
|
|
|
|
| 26 |
if len(answer) == 1:
|
| 27 |
ans, is_num = fix_number(answer[0])
|
| 28 |
if is_num:
|
|
|
|
| 45 |
|
| 46 |
|
| 47 |
def fix_number(number):
|
|
|
|
| 48 |
if type(number) == str:
|
| 49 |
copy_ans = number
|
| 50 |
copy_ans = ' '.join(' '.join(' '.join(copy_ans.split('$')).split('%')).split('sqft')).strip()
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
def fix_prediction(prediction, gold_answer, evaluator):
|
| 64 |
+
if type(prediction) == list and len(prediction) == 1 and (
|
| 65 |
+
type(prediction[0]) == int or ((type(prediction[0]) == str) and prediction[0].isnumeric())):
|
| 66 |
prediction = fix_number(prediction[0])
|
| 67 |
|
| 68 |
if type(prediction) != list:
|
|
|
|
| 83 |
|
| 84 |
|
| 85 |
def question_scorer(prediction, gold_answer):
|
|
|
|
| 86 |
try:
|
| 87 |
prediction = json.loads(prediction)
|
| 88 |
except:
|
| 89 |
prediction = prediction
|
| 90 |
|
| 91 |
+
answer_list = [x for x in gold_answer.split("\n") if len(x.strip()) > 0] if type(
|
| 92 |
+
gold_answer) != list else gold_answer
|
| 93 |
gold_answer, evaluator = parse_answer(answer_list)
|
| 94 |
prediction, run_eval = fix_prediction(prediction, gold_answer, evaluator)
|
| 95 |
|
| 96 |
has_ans = 1.
|
| 97 |
if (type(prediction) != float and len(prediction) == 0) or find_isnan(prediction):
|
| 98 |
has_ans = 0.
|
| 99 |
+
|
| 100 |
+
if type(prediction) == list:
|
| 101 |
+
if all((type(pred) not in {float, int} and len(pred) == 0) or find_isnan(pred) for pred in prediction):
|
| 102 |
+
has_ans = 0
|
| 103 |
+
|
| 104 |
if not run_eval:
|
| 105 |
return 0., has_ans
|
| 106 |
|