Spaces:
Runtime error
Runtime error
File size: 2,766 Bytes
50aa037 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | import numpy as np
def ranking_metric(evalpred):
scores0 = evalpred[0][0]
scores1 = evalpred[0][1]
labels = evalpred[1]
# labels:
# 0 or 1: word 0 or 1 is more legible, other unknown
# 2: both words are equally legible
# 3: neither word is legible
pairs_evaluated = 0
pairs_correct = 0
scores0 = 1 / (1 + np.exp(-scores0))
scores1 = 1 / (1 + np.exp(-scores1))
for i in range(scores0.shape[0]):
if labels[i] < 2:
pairs_evaluated += 1
if labels[i] == 0:
if scores0[i] >= scores1[i]:
pairs_correct += 1
elif labels[i] == 1:
if scores1[i] >= scores0[i]:
pairs_correct += 1
accuracy = pairs_correct / pairs_evaluated
return {'accuracy': accuracy}
def binary_classification_metric(evalpred):
scores0 = evalpred[0][0]
scores1 = evalpred[0][1]
labels = evalpred[1]
# labels:
# 0 or 1: word 0 or 1 is more legible, other unknown
# 2: both words are equally legible
# 3: neither word is legible
words_evaluated = 0
true_positives = 0
false_positives = 0
false_negatives = 0
true_negatives = 0
scores0 = 1 / (1 + np.exp(-scores0))
scores1 = 1 / (1 + np.exp(-scores1))
for i in range(scores0.shape[0]):
if labels[i] < 2:
words_evaluated += 1
else:
words_evaluated += 2
if labels[i] == 0:
if scores0[i] > 0.5:
true_positives += 1
else:
false_negatives += 1
elif labels[i] == 1:
if scores1[i] > 0.5:
true_positives += 1
else:
false_negatives += 1
elif labels[i] == 2:
if scores0[i] > 0.5:
true_positives += 1
else:
false_negatives += 1
if scores1[i] > 0.5:
true_positives += 1
else:
false_negatives += 1
elif labels[i] == 3:
if scores0[i] < 0.5:
true_negatives += 1
else:
false_positives += 1
if scores1[i] < 0.5:
true_negatives += 1
else:
false_positives += 1
# calculate precision, recall, accuracy and f1 score
precision = true_positives / (true_positives + false_positives + 1e-6)
recall = true_positives / (true_positives + false_negatives + 1e-6)
accuracy = (true_positives + true_negatives) / (words_evaluated + 1e-6)
f1_score = 2 * precision * recall / (precision + recall + 1e-6)
return {'precision': precision, 'recall': recall, 'accuracy': accuracy, 'f1_score': f1_score}
|