code_eval

Runtime error

jjkim commited on Oct 10, 2023

Commit

747a38b

1 Parent(s): 96c951b

fix metric info

Files changed (1) hide show

code_eval.py CHANGED Viewed

@@ -145,8 +145,11 @@ class CodeEval(evaluate.Metric):
             # This defines the format of each prediction and reference
             features=datasets.Features(
                 {
-                    "predictions": defaultdict(lambda: datasets.Value("string")),
-                    "references": defaultdict(lambda: datasets.Value("string")),
                 }
             ),
             homepage="https://github.com/openai/human-eval",
@@ -157,11 +160,10 @@ class CodeEval(evaluate.Metric):
     def _compute(
         self,
         predictions,
-        pred_key,
         pred_template,
         references,
-        ref_key,
         ref_template,
         k=[1, 10, 100],
         num_workers=4,
@@ -179,14 +181,9 @@ class CodeEval(evaluate.Metric):
                 "This metric is currently not supported on Windows."
             )
-        predictions = sorted(predictions, key=lambda x: x["id"])
-        references = sorted(references, key=lambda x: x["id"])
         with ThreadPoolExecutor(max_workers=num_workers) as executor:
             results = {}
-            for pred_d, ref_d in zip(predictions, references):
-                assert pred_d["id"] == ref_d["id"]
-                tid = pred_d["id"]
                 results[tid] = []
                 pred = pred_d[pred_key]
                 ref = ref_d[ref_key]

             # This defines the format of each prediction and reference
             features=datasets.Features(
                 {
+                    "ids": datasets.Value("string"),
+                    "predictions": datasets.Sequence(datasets.Value("string")),
+                    "pred_template": datasets.Value("string"),
+                    "references": datasets.Sequence(datasets.Value("string")),
+                    "ref_template": datasets.Value("string"),
                 }
             ),
             homepage="https://github.com/openai/human-eval",
     def _compute(
         self,
+        ids,
         predictions,
         pred_template,
         references,
         ref_template,
         k=[1, 10, 100],
         num_workers=4,
                 "This metric is currently not supported on Windows."
             )
         with ThreadPoolExecutor(max_workers=num_workers) as executor:
             results = {}
+            for tid, pred_d, ref_d in zip(ids, predictions, references):
                 results[tid] = []
                 pred = pred_d[pred_key]
                 ref = ref_d[ref_key]