| import json | |
| import os | |
| from evalplus.data import get_human_eval_plus | |
| from evalplus.gen.util import trusted_exec | |
| def execute(code, input_list) -> bool: | |
| try: | |
| trusted_exec(code, [input_list], entry_point) | |
| except Exception as e: | |
| assert str(e) == "invalid inputs" | |
| return False | |
| return True | |
| def write(new_input_dict): | |
| with open(new_input_path, "a") as f: | |
| f.write(json.dumps(new_input_dict) + "\n") | |
| if __name__ == "__main__": | |
| import argparse | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--input", type=str, default="HumanEvalPlusInputs.jsonl") | |
| args = parser.parse_args() | |
| new_input_path = args.input.replace(".jsonl", "_sanitized.jsonl") | |
| assert not os.path.exists(new_input_path) | |
| task_inputs = {} | |
| for line in open(args.input, "r").read().split("\n"): | |
| if not line: | |
| continue | |
| plus = json.loads(line) | |
| task_inputs[plus["task_id"]] = plus["inputs"] | |
| for p in get_human_eval_plus().values(): | |
| entry_point = p["entry_point"] | |
| code = p["prompt"] + p["canonical_solution"] | |
| task_id = p["task_id"] | |
| new_inputs = task_inputs[task_id] | |
| count = 0 | |
| new_input_dict = {"task_id": task_id, "inputs": []} | |
| for input_list in new_inputs: | |
| res = execute(code, input_list) | |
| if res: | |
| new_input_dict["inputs"].append(input_list) | |
| else: | |
| count += 1 | |
| write(new_input_dict) | |
| if count != 0: | |
| print(f"Task {task_id}: {count}/{len(new_inputs)} tests filtered") | |