File size: 1,598 Bytes
fdf190d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import json
import os

from evalplus.data import get_human_eval_plus
from evalplus.gen.util import trusted_exec


def execute(code, input_list) -> bool:
    try:
        trusted_exec(code, [input_list], entry_point)
    except Exception as e:
        assert str(e) == "invalid inputs"
        return False
    return True


def write(new_input_dict):
    with open(new_input_path, "a") as f:
        f.write(json.dumps(new_input_dict) + "\n")


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--input", type=str, default="HumanEvalPlusInputs.jsonl")
    args = parser.parse_args()

    new_input_path = args.input.replace(".jsonl", "_sanitized.jsonl")
    assert not os.path.exists(new_input_path)

    task_inputs = {}
    for line in open(args.input, "r").read().split("\n"):
        if not line:
            continue
        plus = json.loads(line)
        task_inputs[plus["task_id"]] = plus["inputs"]

    for p in get_human_eval_plus().values():
        entry_point = p["entry_point"]
        code = p["prompt"] + p["canonical_solution"]
        task_id = p["task_id"]
        new_inputs = task_inputs[task_id]
        count = 0
        new_input_dict = {"task_id": task_id, "inputs": []}
        for input_list in new_inputs:
            res = execute(code, input_list)
            if res:
                new_input_dict["inputs"].append(input_list)
            else:
                count += 1
        write(new_input_dict)
        if count != 0:
            print(f"Task {task_id}: {count}/{len(new_inputs)} tests filtered")