| if __name__ == "__main__": |
| import argparse |
| import json |
| import os |
|
|
| from tempdir import TempDir |
|
|
| parser = argparse.ArgumentParser() |
| parser.add_argument("--dataset", default="humaneval", type=str) |
| parser.add_argument("--plus-input", required=True, type=str) |
| parser.add_argument("--output", required=True, type=str) |
| args = parser.parse_args() |
|
|
| assert args.dataset == "humaneval" |
| assert not os.path.exists(args.output), f"{args.output} already exists!" |
|
|
| with TempDir() as tempdir: |
| |
| plus_input = {} |
| with open(args.plus_input) as file: |
| for line in file: |
| problem = json.loads(line) |
| plus_input[problem["task_id"]] = problem["inputs"] |
|
|
| tempf = None |
| if args.dataset == "humaneval": |
| from evalplus.data import get_human_eval_plus |
|
|
| |
| problems = get_human_eval_plus(err_incomplete=False) |
| tempf = os.path.join(tempdir, "HumanEvalPlus.jsonl") |
| with open(tempf, "w") as file: |
| for problem in problems: |
| problem["plus_input"] = plus_input[problem["task_id"]] |
| file.write(json.dumps(problem) + "\n") |
|
|
| |
| os.rename(tempf, args.output) |
|
|