File size: 1,881 Bytes
24c2665
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def check_id(data, n, task_id):
    assert data[n]["task_id"] == task_id


def fix(data):
    # fix: https://github.com/evalplus/evalplus/issues/210

    check_id(data, 215, "Mbpp/459")
    data[215][
        "canonical_solution"
    ] = """
def remove_uppercase(str1):
  return ''.join(c for c in str1 if not c.isupper())
"""

    check_id(data, 55, "Mbpp/102")
    data[55][
        "canonical_solution"
    ] = """
def snake_to_camel(snake_str):
    components = snake_str.split('_')
    return components[0] + ''.join(x.title() for x in components[1:])
"""

    check_id(data, 236, "Mbpp/559")
    data[236][
        "canonical_solution"
    ] = """
def max_sub_array_sum(arr, n):
    max_so_far = arr[0] if len(arr) else 0
    max_ending_here = arr[0] if len(arr) else 0
    for i in range(1, n):
        max_ending_here = max(arr[i], max_ending_here + arr[i])
        max_so_far = max(max_so_far, max_ending_here)
    return max_so_far
"""

    return data


if __name__ == "__main__":
    import json

    TASK_INSPECT = [
        "Mbpp/459",
        "Mbpp/102",
        "Mbpp/559",
    ]
    SOURCE_VERSION = "v0.2.0"
    TARGET_VERSION = "v0.2.1"

    def evolve(src_file, tgt_file):
        with open(src_file) as f:
            data = [json.loads(line) for line in f.readlines() if line]

        data = fix(data)
        with open(tgt_file, "wb") as f:
            for x in data:
                f.write((json.dumps(x) + "\n").encode("utf-8"))

    evolve(f"MbppPlus-{SOURCE_VERSION}.jsonl", f"MbppPlus-{TARGET_VERSION}.jsonl")

    # Inspect the output of jsonl
    with open(f"MbppPlus-{TARGET_VERSION}.jsonl") as f:
        data = [json.loads(line) for line in f.readlines() if line]

    data = {x["task_id"]: x for x in data}
    for task_id in TASK_INSPECT:
        print(data[task_id]["canonical_solution"])
        print("====================================")