|
|
def check_id(data, n, task_id): |
|
|
assert data[n]["task_id"] == task_id |
|
|
|
|
|
|
|
|
def fix(data): |
|
|
|
|
|
|
|
|
check_id(data, 334, "Mbpp/734") |
|
|
data[334]["prompt"] = data[334]["prompt"].replace( |
|
|
"https://www.geeksforgeeks.org/sum-of-products-of-all-possible-subarrays/", "" |
|
|
) |
|
|
|
|
|
check_id(data, 335, "Mbpp/735") |
|
|
data[335]["prompt"] = data[335]["prompt"].replace( |
|
|
"https://www.geeksforgeeks.org/toggle-bits-number-expect-first-last-bits/", "" |
|
|
) |
|
|
|
|
|
check_id(data, 336, "Mbpp/736") |
|
|
data[336]["prompt"] = data[336]["prompt"].replace( |
|
|
"https://www.w3resource.com/python-exercises/data-structures-and-algorithms/python-data-structure-exercise-24.php", |
|
|
"", |
|
|
) |
|
|
|
|
|
check_id(data, 338, "Mbpp/739") |
|
|
data[338]["prompt"] = data[338]["prompt"].replace( |
|
|
"https://www.geeksforgeeks.org/index-of-smallest-triangular-number-with-n-digits/", |
|
|
"", |
|
|
) |
|
|
|
|
|
check_id(data, 339, "Mbpp/740") |
|
|
data[339]["prompt"] = data[339]["prompt"].replace( |
|
|
"https://www.geeksforgeeks.org/python-convert-tuple-to-adjacent-pair-dictionary/", |
|
|
"", |
|
|
) |
|
|
|
|
|
check_id(data, 342, "Mbpp/743") |
|
|
data[342]["prompt"] = data[342]["prompt"].replace( |
|
|
"https://www.geeksforgeeks.org/python-program-right-rotate-list-n/", "" |
|
|
) |
|
|
|
|
|
check_id(data, 344, "Mbpp/745") |
|
|
data[344]["prompt"] = data[344]["prompt"].replace( |
|
|
"https://www.w3resource.com/python-exercises/lambda/python-lambda-exercise-24.php", |
|
|
"", |
|
|
) |
|
|
|
|
|
check_id(data, 347, "Mbpp/749") |
|
|
data[347]["prompt"] = data[347]["prompt"].replace( |
|
|
"https://www.geeksforgeeks.org/python-sort-numeric-strings-in-a-list/", "" |
|
|
) |
|
|
|
|
|
check_id(data, 349, "Mbpp/751") |
|
|
data[349]["prompt"] = data[349]["prompt"].replace( |
|
|
"https://www.geeksforgeeks.org/how-to-check-if-a-given-array-represents-a-binary-heap/", |
|
|
"", |
|
|
) |
|
|
|
|
|
check_id(data, 350, "Mbpp/752") |
|
|
data[350]["prompt"] = data[350]["prompt"].replace( |
|
|
"https://www.geeksforgeeks.org/jacobsthal-and-jacobsthal-lucas-numbers/", "" |
|
|
) |
|
|
|
|
|
check_id(data, 351, "Mbpp/753") |
|
|
data[351]["prompt"] = data[351]["prompt"].replace( |
|
|
"https://www.geeksforgeeks.org/python-find-minimum-k-records-from-tuple-list/", |
|
|
"", |
|
|
) |
|
|
|
|
|
check_id(data, 354, "Mbpp/757") |
|
|
data[354]["prompt"] = data[354]["prompt"].replace( |
|
|
"https://www.geeksforgeeks.org/python-program-to-count-the-pairs-of-reverse-strings/", |
|
|
"", |
|
|
) |
|
|
|
|
|
check_id(data, 359, "Mbpp/763") |
|
|
data[359]["prompt"] = data[359]["prompt"].replace( |
|
|
"https://www.geeksforgeeks.org/find-minimum-difference-pair/", "" |
|
|
) |
|
|
|
|
|
check_id(data, 366, "Mbpp/771") |
|
|
data[366]["prompt"] = data[366]["prompt"].replace( |
|
|
"https://www.geeksforgeeks.org/check-for-balanced-parentheses-in-an-expression/", |
|
|
"", |
|
|
) |
|
|
|
|
|
check_id(data, 372, "Mbpp/780") |
|
|
data[372]["prompt"] = data[372]["prompt"].replace( |
|
|
"https://www.geeksforgeeks.org/python-combinations-of-sum-with-tuples-in-tuple-list/", |
|
|
"", |
|
|
) |
|
|
|
|
|
check_id(data, 373, "Mbpp/781") |
|
|
data[373]["prompt"] = data[373]["prompt"].replace( |
|
|
"https://www.w3resource.com/python-exercises/basic/python-basic-1-exercise-24.php", |
|
|
"", |
|
|
) |
|
|
|
|
|
check_id(data, 374, "Mbpp/782") |
|
|
data[374]["prompt"] = data[374]["prompt"].replace( |
|
|
"https://www.geeksforgeeks.org/sum-of-all-odd-length-subarrays/", "" |
|
|
) |
|
|
|
|
|
check_id(data, 392, "Mbpp/803") |
|
|
data[392]["prompt"] = data[392]["prompt"].replace( |
|
|
"https://www.geeksforgeeks.org/check-if-given-number-is-perfect-square-in-cpp/", |
|
|
"", |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
check_id(data, 375, "Mbpp/783") |
|
|
del data[375] |
|
|
|
|
|
check_id(data, 345, "Mbpp/746") |
|
|
del data[345] |
|
|
|
|
|
check_id(data, 318, "Mbpp/640") |
|
|
del data[318] |
|
|
|
|
|
check_id(data, 282, "Mbpp/595") |
|
|
del data[282] |
|
|
|
|
|
check_id(data, 270, "Mbpp/582") |
|
|
del data[270] |
|
|
|
|
|
check_id(data, 263, "Mbpp/574") |
|
|
del data[263] |
|
|
|
|
|
check_id(data, 231, "Mbpp/461") |
|
|
del data[231] |
|
|
|
|
|
check_id(data, 216, "Mbpp/442") |
|
|
del data[216] |
|
|
|
|
|
check_id(data, 212, "Mbpp/438") |
|
|
del data[212] |
|
|
|
|
|
check_id(data, 206, "Mbpp/431") |
|
|
del data[206] |
|
|
|
|
|
check_id(data, 187, "Mbpp/407") |
|
|
del data[187] |
|
|
|
|
|
check_id(data, 183, "Mbpp/400") |
|
|
del data[183] |
|
|
|
|
|
check_id(data, 180, "Mbpp/396") |
|
|
del data[180] |
|
|
|
|
|
check_id(data, 160, "Mbpp/295") |
|
|
del data[160] |
|
|
|
|
|
check_id(data, 121, "Mbpp/249") |
|
|
del data[121] |
|
|
|
|
|
check_id(data, 107, "Mbpp/229") |
|
|
del data[107] |
|
|
|
|
|
check_id(data, 94, "Mbpp/164") |
|
|
del data[94] |
|
|
|
|
|
check_id(data, 89, "Mbpp/143") |
|
|
del data[89] |
|
|
|
|
|
check_id(data, 67, "Mbpp/117") |
|
|
del data[67] |
|
|
|
|
|
check_id(data, 65, "Mbpp/115") |
|
|
del data[65] |
|
|
|
|
|
check_id(data, 37, "Mbpp/83") |
|
|
del data[37] |
|
|
|
|
|
return data |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
import json |
|
|
|
|
|
TASK_INSPECT = [ |
|
|
"Mbpp/734", |
|
|
"Mbpp/735", |
|
|
"Mbpp/736", |
|
|
"Mbpp/739", |
|
|
"Mbpp/740", |
|
|
"Mbpp/743", |
|
|
"Mbpp/745", |
|
|
"Mbpp/749", |
|
|
"Mbpp/751", |
|
|
"Mbpp/752", |
|
|
"Mbpp/753", |
|
|
"Mbpp/757", |
|
|
"Mbpp/763", |
|
|
"Mbpp/771", |
|
|
"Mbpp/780", |
|
|
"Mbpp/781", |
|
|
"Mbpp/782", |
|
|
"Mbpp/803", |
|
|
] |
|
|
SOURCE_VERSION = "v0.1.0" |
|
|
TARGET_VERSION = "v0.2.0" |
|
|
|
|
|
def evolve(src_file, tgt_file): |
|
|
with open(src_file) as f: |
|
|
data = [json.loads(line) for line in f.readlines() if line] |
|
|
|
|
|
data = fix(data) |
|
|
with open(tgt_file, "wb") as f: |
|
|
for x in data: |
|
|
f.write((json.dumps(x) + "\n").encode("utf-8")) |
|
|
|
|
|
evolve(f"MbppPlus-{SOURCE_VERSION}.jsonl", f"MbppPlus-{TARGET_VERSION}.jsonl") |
|
|
|
|
|
|
|
|
with open(f"MbppPlus-{TARGET_VERSION}.jsonl") as f: |
|
|
data = [json.loads(line) for line in f.readlines() if line] |
|
|
|
|
|
data = {x["task_id"]: x for x in data} |
|
|
for task_id in TASK_INSPECT: |
|
|
print(data[task_id]["prompt"]) |
|
|
print("====================================") |
|
|
|