handle cases where there are mulitple correct refs and use the best score
Browse files- nl2bash_m.py +25 -23
nl2bash_m.py
CHANGED
|
@@ -109,37 +109,39 @@ class nl2bash_m(evaluate.Metric):
|
|
| 109 |
|
| 110 |
|
| 111 |
final_score = 0
|
|
|
|
| 112 |
|
| 113 |
-
|
| 114 |
-
if len(pred) == 0 and len(ref[0]) == 0:
|
| 115 |
score = 1
|
| 116 |
-
elif len(pred) == 0 or len(ref
|
| 117 |
score = 0
|
| 118 |
-
else:
|
| 119 |
-
|
|
|
|
|
|
|
| 120 |
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
|
| 138 |
-
|
| 139 |
-
|
|
|
|
|
|
|
| 140 |
|
| 141 |
final_score = final_score/len(predictions)
|
| 142 |
-
print("f_s: ", final_score)
|
| 143 |
-
|
| 144 |
|
| 145 |
return {"nl2bash_m": (final_score)}
|
|
|
|
| 109 |
|
| 110 |
|
| 111 |
final_score = 0
|
| 112 |
+
for pred, refs in zip(predictions, references):
|
| 113 |
|
| 114 |
+
if len(pred) == 0 and min([len(ref) for ref in refs]) == 0:
|
|
|
|
| 115 |
score = 1
|
| 116 |
+
elif len(pred) == 0 or min([len(ref) for ref in refs]) == 0:
|
| 117 |
score = 0
|
| 118 |
+
else:
|
| 119 |
+
best_score = 0
|
| 120 |
+
for ref in refs:
|
| 121 |
+
pred_words, ref_words = pred.split(), ref.split()
|
| 122 |
|
| 123 |
+
|
| 124 |
+
# Get the cmd of predicted and ref
|
| 125 |
+
cmd_corr = 1 if pred_words.pop(0)==ref_words.pop(0) else 0
|
| 126 |
|
| 127 |
+
# Get the option of predicted and ref
|
| 128 |
+
pred_option = [ x for x in pred_words if x[0] == '-']
|
| 129 |
+
ref_option = [ x for x in ref_words if x[0] == '-']
|
| 130 |
+
|
| 131 |
+
# Get the arguments of predicted and ref
|
| 132 |
+
pred_args = [ x for x in pred_words if x[0] != '-']
|
| 133 |
+
ref_args = [ x for x in ref_words if x[0] != '-']
|
| 134 |
|
| 135 |
+
# Calculate scores
|
| 136 |
+
cmd_score = cmd_weight * cmd_corr
|
| 137 |
+
opt_score = opt_weight * get_score(pred_option, ref_option)
|
| 138 |
+
arg_score = arg_weight * get_score(pred_args, ref_args)
|
| 139 |
|
| 140 |
+
score = cmd_score + opt_score + arg_score
|
| 141 |
+
best_score = max(best_score, score)
|
| 142 |
+
|
| 143 |
+
final_score += best_score
|
| 144 |
|
| 145 |
final_score = final_score/len(predictions)
|
|
|
|
|
|
|
| 146 |
|
| 147 |
return {"nl2bash_m": (final_score)}
|