Spaces:

Josh98
/

nl2bash_m

Runtime error

Josh98 commited on Feb 28, 2023

Commit

b9a7a2e

1 Parent(s): 5a101a3

handle cases where there are mulitple correct refs and use the best score

Files changed (1) hide show

nl2bash_m.py CHANGED Viewed

@@ -109,37 +109,39 @@ class nl2bash_m(evaluate.Metric):
         final_score = 0
-        for pred, ref in zip(predictions, references):
-            if len(pred) == 0 and len(ref[0]) == 0:
                 score = 1
-            elif len(pred) == 0 or len(ref[0]) == 0:
                 score = 0
-            else:
-                pred_words, ref_words = pred.split(), ref[0].split()
-                # Get the cmd of predicted and ref
-                cmd_corr = 1 if pred_words.pop(0)==ref_words.pop(0) else 0
-                # Get the option of predicted and ref
-                pred_option = [ x for x in pred_words if x[0] == '-']
-                ref_option = [ x for x in ref_words if x[0] == '-']
-                # Get the arguments of predicted and ref
-                pred_args = [ x for x in pred_words if x[0] != '-']
-                ref_args = [ x for x in ref_words if x[0] != '-']
-                # Calculate scores
-                cmd_score = cmd_weight * cmd_corr
-                opt_score = opt_weight * self.get_score(pred_option, ref_option)
-                arg_score = arg_weight * self.get_score(pred_args, ref_args)
-                score = cmd_score + opt_score + arg_score
-            final_score += score
         final_score = final_score/len(predictions)
-        print("f_s: ", final_score)
         return {"nl2bash_m": (final_score)}

         final_score = 0
+        for pred, refs in zip(predictions, references):
+            if len(pred) == 0 and min([len(ref) for ref in refs]) == 0:
                 score = 1
+            elif len(pred) == 0 or min([len(ref) for ref in refs]) == 0:
                 score = 0
+            else:
+                best_score = 0
+                for ref in refs:
+                    pred_words, ref_words = pred.split(), ref.split()
+                    # Get the cmd of predicted and ref
+                    cmd_corr = 1 if pred_words.pop(0)==ref_words.pop(0) else 0
+                    # Get the option of predicted and ref
+                    pred_option = [ x for x in pred_words if x[0] == '-']
+                    ref_option = [ x for x in ref_words if x[0] == '-']
+                    # Get the arguments of predicted and ref
+                    pred_args = [ x for x in pred_words if x[0] != '-']
+                    ref_args = [ x for x in ref_words if x[0] != '-']
+                    # Calculate scores
+                    cmd_score = cmd_weight * cmd_corr
+                    opt_score = opt_weight * get_score(pred_option, ref_option)
+                    arg_score = arg_weight * get_score(pred_args, ref_args)
+                    score = cmd_score + opt_score + arg_score
+                    best_score = max(best_score, score)
+            final_score += best_score
         final_score = final_score/len(predictions)
         return {"nl2bash_m": (final_score)}