hjkim00's picture
Restore all essential files - code, configs, and MBPP/HumanEval data
24c2665 verified
def check_id(data, n, task_id):
assert data[n]["task_id"] == task_id
def fix(data):
# fix: https://github.com/evalplus/evalplus/issues/210
check_id(data, 215, "Mbpp/459")
data[215][
"canonical_solution"
] = """
def remove_uppercase(str1):
return ''.join(c for c in str1 if not c.isupper())
"""
check_id(data, 55, "Mbpp/102")
data[55][
"canonical_solution"
] = """
def snake_to_camel(snake_str):
components = snake_str.split('_')
return components[0] + ''.join(x.title() for x in components[1:])
"""
check_id(data, 236, "Mbpp/559")
data[236][
"canonical_solution"
] = """
def max_sub_array_sum(arr, n):
max_so_far = arr[0] if len(arr) else 0
max_ending_here = arr[0] if len(arr) else 0
for i in range(1, n):
max_ending_here = max(arr[i], max_ending_here + arr[i])
max_so_far = max(max_so_far, max_ending_here)
return max_so_far
"""
return data
if __name__ == "__main__":
import json
TASK_INSPECT = [
"Mbpp/459",
"Mbpp/102",
"Mbpp/559",
]
SOURCE_VERSION = "v0.2.0"
TARGET_VERSION = "v0.2.1"
def evolve(src_file, tgt_file):
with open(src_file) as f:
data = [json.loads(line) for line in f.readlines() if line]
data = fix(data)
with open(tgt_file, "wb") as f:
for x in data:
f.write((json.dumps(x) + "\n").encode("utf-8"))
evolve(f"MbppPlus-{SOURCE_VERSION}.jsonl", f"MbppPlus-{TARGET_VERSION}.jsonl")
# Inspect the output of jsonl
with open(f"MbppPlus-{TARGET_VERSION}.jsonl") as f:
data = [json.loads(line) for line in f.readlines() if line]
data = {x["task_id"]: x for x in data}
for task_id in TASK_INSPECT:
print(data[task_id]["canonical_solution"])
print("====================================")