File size: 1,881 Bytes
24c2665 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
def check_id(data, n, task_id):
assert data[n]["task_id"] == task_id
def fix(data):
# fix: https://github.com/evalplus/evalplus/issues/210
check_id(data, 215, "Mbpp/459")
data[215][
"canonical_solution"
] = """
def remove_uppercase(str1):
return ''.join(c for c in str1 if not c.isupper())
"""
check_id(data, 55, "Mbpp/102")
data[55][
"canonical_solution"
] = """
def snake_to_camel(snake_str):
components = snake_str.split('_')
return components[0] + ''.join(x.title() for x in components[1:])
"""
check_id(data, 236, "Mbpp/559")
data[236][
"canonical_solution"
] = """
def max_sub_array_sum(arr, n):
max_so_far = arr[0] if len(arr) else 0
max_ending_here = arr[0] if len(arr) else 0
for i in range(1, n):
max_ending_here = max(arr[i], max_ending_here + arr[i])
max_so_far = max(max_so_far, max_ending_here)
return max_so_far
"""
return data
if __name__ == "__main__":
import json
TASK_INSPECT = [
"Mbpp/459",
"Mbpp/102",
"Mbpp/559",
]
SOURCE_VERSION = "v0.2.0"
TARGET_VERSION = "v0.2.1"
def evolve(src_file, tgt_file):
with open(src_file) as f:
data = [json.loads(line) for line in f.readlines() if line]
data = fix(data)
with open(tgt_file, "wb") as f:
for x in data:
f.write((json.dumps(x) + "\n").encode("utf-8"))
evolve(f"MbppPlus-{SOURCE_VERSION}.jsonl", f"MbppPlus-{TARGET_VERSION}.jsonl")
# Inspect the output of jsonl
with open(f"MbppPlus-{TARGET_VERSION}.jsonl") as f:
data = [json.loads(line) for line in f.readlines() if line]
data = {x["task_id"]: x for x in data}
for task_id in TASK_INSPECT:
print(data[task_id]["canonical_solution"])
print("====================================")
|